From 3a1c5b537aa2cc3655d67fcd487c858f848a58ac Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 6 Feb 2024 20:14:12 +0100 Subject: [PATCH] SpecFit updates for improvements (#34) * changed optimization * removed plots, added typedtables * changed auto calibration to more stables * new cut truncated fits * updated aoe calibration for dep cut gen * updated package * qc for energy calibration and aoe tuning * new routines for simple_calibration * more flexible peak fitting * removed types for keywords * new auto calib for peak splits * new ctc routines * updated filter optimization routine * updated qc cuts for sg and energy * renamed filter optimization * updated specfits * updated aoe calibration * Update BAT version bound * STASH * New recipes * AoE fit update * Add all e filter cuts * Added calbrate energy * STASH * New qc * STASH * Drop remnants of Julia no iterative fit * fwhm uncertainty with covmat * function to sample test data from model, run test doesnt need to read external data anymore * add distribution package * fix pull request issue, forgot a plot * add fast flatten * add norm. residuals to fit results + cosmetic improvements * Fixed compat * Fix docstring * Increased LegendDataManagement package version * Cleaned dosctrings * Changed some formatting * Apply suggestions from code review remove unnecessary packages from Project.toml Co-authored-by: Florian * Apply suggestions from code review remove unnecessary exports Co-authored-by: Florian * Fixed compat * Fix docstring * Increased LegendDataManagement package version * add fast flatten * add norm. residuals to fit results + cosmetic improvements * Fixed deps * Fixed specfit test * Bug Fix Project.toml --------- Co-authored-by: Oliver Schulz Co-authored-by: Felix Hagemann Co-authored-by: LisaSchlueter Co-authored-by: Lisa Schlueter <141262803+LisaSchlueter@users.noreply.github.com> --- Project.toml | 9 +- ext/LegendSpecFitsRecipesBaseExt.jl | 162 ++++++++++++---- src/LegendSpecFits.jl | 34 ++-- src/aoe_calibration.jl | 192 +++++++++++-------- src/aoefit.jl | 284 ++++++++++++++-------------- src/auto_calibration.jl | 25 ++- src/ctc.jl | 33 +++- src/cut.jl | 81 +++++++- src/filter_optimization.jl | 240 +++++++++++++++++++++++ src/gof.jl | 176 +++++++++++++++++ src/optimization.jl | 132 ------------- src/qc.jl | 69 +++++++ src/simple_calibration.jl | 9 +- src/singlefit.jl | 78 +++++++- src/specfit.jl | 131 ++++++++----- src/utils.jl | 75 +++++++- test/Project.toml | 4 + test/test_specfit.jl | 16 +- test/test_utils.jl | 62 ++++++ 19 files changed, 1333 insertions(+), 479 deletions(-) create mode 100644 src/filter_optimization.jl create mode 100644 src/gof.jl delete mode 100644 src/optimization.jl create mode 100644 src/qc.jl create mode 100644 test/test_utils.jl diff --git a/Project.toml b/Project.toml index 10cd4d02..b2a82755 100644 --- a/Project.toml +++ b/Project.toml @@ -9,22 +9,23 @@ BAT = "c0cd4b16-88b7-57fa-983b-ab80aecada7e" ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" +LegendDataManagement = "9feedd95-f0e0-423f-a8dc-de0970eae6b3" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearRegression = "92481ed7-9fb7-40fd-80f2-46fd0f076581" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" +Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" Optim = "429524aa-4258-5aef-a3af-852621145aeb" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PropDicts = "4dc08600-4268-439e-8673-d706fafbb426" RadiationSpectra = "4f207c7e-01da-51d7-a1a0-c8c06dd1d883" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" -Requires = "ae029012-a4dd-5104-9daa-d747884805df" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" @@ -55,16 +56,16 @@ ForwardDiff = "0.10" IntervalSets = "0.7" InverseFunctions = "0.1" IrrationalConstants = "0.1, 0.2" +LegendDataManagement = "0.2.7" LinearAlgebra = "1" LinearRegression = "0.2" LsqFit = "0.14, 0.15" +Measurements = "2" Optim = "1" -Plots = "1" PropDicts = "0.2" RadiationSpectra = "0.5" Random = "1" RecipesBase = "1" -Requires = "1" Roots = "2" SnoopPrecompile = "1" SpecialFunctions = "0.10, 1, 2" diff --git a/ext/LegendSpecFitsRecipesBaseExt.jl b/ext/LegendSpecFitsRecipesBaseExt.jl index 8b7526ad..591c59a7 100644 --- a/ext/LegendSpecFitsRecipesBaseExt.jl +++ b/ext/LegendSpecFitsRecipesBaseExt.jl @@ -12,17 +12,16 @@ using StatsBase, LinearAlgebra legend := :bottomright @series begin seriestype := :histogram - # y := report.f_fit.(x) - bins --> 2000 + bins --> :fd + # bins --> 2000 normalize --> :pdf label := "Data" - # label := @sprintf("μ = %s ± %s\nσ = %s ± %s\nn = %d", report.μ, report.μ_err, report.σ, report.σ_err, report.n) x[x .> cuts.low .&& x .< cuts.high] # x end @series begin color := :red - label := format("Normal Fit (μ = ({:.2f} ± {:.2f})µs, σ = ({:.2f} ± {:.2f})µs", ustrip.([report.μ, report.μ_err, report.σ, report.σ_err])...) + label := format("Normal Fit (μ = ({:.2f} ± {:.2f}), σ = ({:.2f} ± {:.2f})", ustrip.([report.μ, report.μ_err, report.σ, report.σ_err])...) lw := 3 ustrip(cuts.low):0.00001:ustrip(cuts.high), t -> report.f_fit(t) end @@ -66,8 +65,8 @@ end gridlinewidth := 0.5 # xscale := :log10 # yscale := :log10 - ylims := (1, 5) - xlims := (1, 5) + ylims := (1, 8) + xlims := (0.5, 5) @series begin seriestype := :scatter u"µs", NoUnits @@ -86,41 +85,68 @@ end end end -@recipe function f(report::NamedTuple{(:v, :h, :f_fit, :f_sig, :f_lowEtail, :f_bck)}; show_label::Bool) +@recipe function f(report:: NamedTuple{(:wl, :min_sf, :min_sf_err, :a_grid_wl_sg, :sfs, :sfs_err)}) + xlabel := "Window Length (ns)" + ylabel := "SEP Surrival Fraction (%)" + grid := :true + gridcolor := :black + gridalpha := 0.2 + gridlinewidth := 0.5 + ylims := (0, 30) + @series begin + seriestype := :scatter + label := "SF" + yerror --> report.sfs_err + ustrip.(report.a_grid_wl_sg), report.sfs + end + @series begin + seriestype := :hline + label := "Min. SF (WT: $(report.wl))" + color := :red + linewidth := 2.5 + [report.min_sf] + end +end + +@recipe function f(report::NamedTuple{(:v, :h, :f_fit, :f_sig, :f_lowEtail, :f_bck)}; show_label=true, show_fit=true) xlabel := "Energy (keV)" ylabel := "Counts" legend := :bottomright yscale := :log10 - ylims := (1, 1.2*report.f_sig(report.v.μ)) + ylim_max = max(5*report.f_sig(report.v.μ), 5*maximum(report.h.weights)) + ylim_max = ifelse(ylim_max == 0.0, 1e5, ylim_max) + ylims := (1, ylim_max) @series begin seriestype := :stepbins label := ifelse(show_label, "Data", "") bins --> :sqrt LinearAlgebra.normalize(report.h, mode = :density) end - @series begin - seriestype := :line - label := ifelse(show_label, "Best Fit", "") - color := :red - minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_fit - end - @series begin - seriestype := :line - label := ifelse(show_label, "Signal", "") - color := :green - minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_sig - end - @series begin - seriestype := :line - label := ifelse(show_label, "Low Energy Tail", "") - color := :blue - minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_lowEtail - end - @series begin - seriestype := :line - label := ifelse(show_label, "Background", "") - color := :black - minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_bck + if show_fit + @series begin + seriestype := :line + label := ifelse(show_label, "Best Fit", "") + color := :red + minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_fit + end + @series begin + seriestype := :line + label := ifelse(show_label, "Signal", "") + color := :green + minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_sig + end + @series begin + seriestype := :line + label := ifelse(show_label, "Low Energy Tail", "") + color := :blue + minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_lowEtail + end + @series begin + seriestype := :line + label := ifelse(show_label, "Background", "") + color := :black + minimum(report.h.edges[1]):0.1:maximum(report.h.edges[1]), report.f_bck + end end end @@ -182,7 +208,7 @@ end end end -@recipe function f(report::NamedTuple{(:h_calsimple, :h_uncal, :c, :fep_guess, :peakhists, :peakstats)}; cal::Bool) +@recipe function f(report::NamedTuple{(:h_calsimple, :h_uncal, :c, :fep_guess, :peakhists, :peakstats)}; cal=true) ylabel := "Counts" legend := :topright yscale := :log10 @@ -216,7 +242,7 @@ end end end -@recipe function f(report_ctc::NamedTuple{(:peak, :window, :fct, :bin_width, :bin_width_qdrift, :e_peak, :e_ctc, :qdrift_peak, :h_before, :h_after)}) +@recipe function f(report_ctc::NamedTuple{(:peak, :window, :fct, :bin_width, :bin_width_qdrift, :e_peak, :e_ctc, :qdrift_peak, :h_before, :h_after, :fwhm_before, :fwhm_after, :err, :report_before, :report_after)}) layout := (2,2) thickness_scaling := 2.0 size := (2400, 1600) @@ -252,10 +278,33 @@ end label := "Before CTC" xlabel := "Energy (keV)" ylabel := "Counts" + title := "FWHM $(round(report_ctc.fwhm_before, digits=2))±$(round(report_ctc.err.fwhm_before, digits=2))keV" yscale := :log10 subplot := 3 report_ctc.h_before end + # @series begin + # # seriestype := :stepbins + # color := :red + # # label := "Before CTC" + # # xlabel := "Energy (keV)" + # # ylabel := "Counts" + # # yscale := :log10 + # subplot := 3 + # # report_ctc.h_before + # minimum(report_ctc.e_peak):0.001:maximum(report_ctc.e_peak), t -> report_ctc.report_before.f_fit(t) + # end + # @series begin + # # seriestype := :stepbins + # color := :red + # # label := "Before CTC" + # # xlabel := "Energy (keV)" + # # ylabel := "Counts" + # # yscale := :log10 + # subplot := 4 + # # report_ctc.h_before + # minimum(report_ctc.e_peak):0.001:maximum(report_ctc.e_peak), t -> report_ctc.report_after.f_fit(t) + # end @series begin seriestype := :stepbins color := :red @@ -272,11 +321,58 @@ end label := "After CTC" xlabel := "Energy (keV)" ylabel := "Counts" + title := "FWHM $(round(report_ctc.fwhm_after, digits=2))±$(round(report_ctc.err.fwhm_after, digits=2))keV" yscale := :log10 subplot := 4 report_ctc.h_after end end +@recipe function f(report_window_cut::NamedTuple{(:h, :f_fit, :x_fit, :low_cut, :high_cut, :low_cut_fit, :high_cut_fit, :center, :σ)}) + xlims := (ustrip(report_window_cut.center - 5*report_window_cut.σ), ustrip(report_window_cut.center + 5*report_window_cut.σ)) + @series begin + seriestype := :barbins + alpha := 0.5 + label := "Data" + report_window_cut.h + end + @series begin + seriestype := :line + label := "Best Fit" + color := :red + linewidth := 3 + report_window_cut.x_fit, report_window_cut.f_fit + end + @series begin + seriestype := :vline + label := "Cut Window" + color := :green + linewidth := 3 + ustrip.([report_window_cut.low_cut, report_window_cut.high_cut]) + end + # @series begin + # seriestype := :vline + # label := "Center" + # color := :blue + # linewidth := 3 + # ustrip.([report_window_cut.center]) + # end + @series begin + seriestype := :vline + label := "Fit Window" + color := :orange + linewidth := 3 + ustrip.([report_window_cut.low_cut_fit, report_window_cut.high_cut_fit]) + end + @series begin + seriestype := :vspan + label := "" + color := :lightgreen + alpha := 0.2 + ustrip.([report_window_cut.low_cut, report_window_cut.high_cut]) + end + +end + end # module LegendSpecFitsRecipesBaseExt \ No newline at end of file diff --git a/src/LegendSpecFits.jl b/src/LegendSpecFits.jl index 2196637e..2ee6a3af 100644 --- a/src/LegendSpecFits.jl +++ b/src/LegendSpecFits.jl @@ -13,25 +13,27 @@ using Random using ArgCheck using ArraysOfArrays +using BAT using Distributions using FillArrays +using ForwardDiff +using IntervalSets using InverseFunctions using IrrationalConstants +using LegendDataManagement +using LinearRegression +using LsqFit +using Optim +using PropDicts using RadiationSpectra +using Roots using SpecialFunctions using StatsBase using StructArrays using Tables +using TypedTables using Unitful using ValueShapes -using IntervalSets -using Roots -using BAT -using LsqFit -using Optim -using ForwardDiff -using LinearRegression -using PropDicts include("utils.jl") include("peakshapes.jl") @@ -39,7 +41,7 @@ include("likelihoods.jl") include("priors.jl") include("cut.jl") include("aoefit.jl") -include("optimization.jl") +include("filter_optimization.jl") include("singlefit.jl") include("specfit.jl") include("fwhm.jl") @@ -48,18 +50,8 @@ include("auto_calibration.jl") include("aoe_calibration.jl") include("specfit_combined.jl") include("ctc.jl") - -# @static if !isdefined(Base, :get_extension) -# using Requires -# include("../ext/LegendSpecFitsRecipesBaseExt.jl") -# end - +include("qc.jl") +include("gof.jl") include("precompile.jl") -function __init__() - @static if !isdefined(Base, :get_extension) - @require BAT = "c0cd4b16-88b7-57fa-983b-ab80aecada7e" include("../ext/LegendSpecFitsBATExt.jl") - end -end - end # module diff --git a/src/aoe_calibration.jl b/src/aoe_calibration.jl index 199cc85b..354016e7 100644 --- a/src/aoe_calibration.jl +++ b/src/aoe_calibration.jl @@ -1,102 +1,112 @@ -f_aoe_sigma(x, p) = p[1] .+ p[2]*exp.(-p[3]./x) +# f_aoe_sigma(x, p) = p[1] .+ p[2]*exp.(-p[3]./x) +@. f_aoe_sigma(x, p) = sqrt(abs(p[1]) + abs(p[2])/x^2) +f_aoe_mu(x, p) = p[1] .+ p[2]*x """ fit_aoe_corrections(e::Array{<:Real}, μ::Array{T}, σ::Array{T}) where T<:Real Fit the corrections for the AoE value of the detector. +# Returns +- `e`: Energy values +- `μ`: Mean values +- `σ`: Sigma values +- `μ_scs`: Fit result for the mean values +- `f_μ_scs`: Fit function for the mean values +- `σ_scs`: Fit result for the sigma values +- `f_σ_scs`: Fit function for the sigma values +- `err`: Uncertainties """ function fit_aoe_corrections(e::Array{<:Real}, μ::Array{T}, σ::Array{T}) where T<:Real # fit compton band mus with linear function μ_cut = (mean(μ) - 2*std(μ) .< μ .&& μ .< mean(μ) + 2*std(μ)) - μ_scs = linregress(e[μ_cut], μ[μ_cut]) + e, μ, σ = e[μ_cut], μ[μ_cut], σ[μ_cut] + # μ_scs = linregress(e[μ_cut], μ[μ_cut]) + μ_scs = linregress(e, μ) μ_scs_slope, μ_scs_intercept = LinearRegression.slope(μ_scs)[1], LinearRegression.bias(μ_scs)[1] + μ_scs = curve_fit(f_aoe_mu, e, μ, [μ_scs_intercept, μ_scs_slope]) + μ_scs_err = stderror(μ_scs) @debug "μ_scs_slope : $μ_scs_slope" @debug "μ_scs_intercept: $μ_scs_intercept" # fit compton band sigmas with exponential function - σ_scs = curve_fit(f_aoe_sigma, e, σ, [0.0, maximum(σ), 5.0]) + σ_scs = curve_fit(f_aoe_sigma, e, σ, [median(σ)^2, 1e-5]) + σ_scs_err = stderror(σ_scs) @debug "σ_scs offset: $(σ_scs.param[1])" @debug "σ_scs shift : $(σ_scs.param[2])" - @debug "σ_scs phase : $(σ_scs.param[3])" ( - e = e[μ_cut], - μ = μ[μ_cut], + e = e, + μ = μ, + μ_scs = μ_scs.param, f_μ_scs = x -> μ_scs_slope * x + μ_scs_intercept, - μ_scs_slope = μ_scs_slope, - μ_scs_intercept = μ_scs_intercept, - σ = σ[μ_cut], - σ_scs = σ_scs.param, - f_σ_scs = x -> Base.Fix2(f_aoe_sigma, σ_scs.param)(x) + σ = σ, + σ_scs = abs.(σ_scs.param), + f_σ_scs = x -> Base.Fix2(f_aoe_sigma, σ_scs.param)(x), + err = (σ_scs = σ_scs_err, + μ_scs = μ_scs_err + ) ) end export fit_aoe_corrections """ - correctAoE!(aoe::Array{T}, e::Array{T}, aoe_corrections::NamedTuple{(:e, :μ, :f_μ_scs, :μ_scs_slope, :μ_scs_intercept, :σ, :σ_scs, :f_σ_scs)}) where T<:Real + correctaoe!(aoe::Array{T}, e::Array{T}, aoe_corrections::NamedTuple) where T<:Real Correct the AoE values in the `aoe` array using the corrections in `aoe_corrections`. """ -function correct_aoe!(aoe::Array{T}, e::Array{T}, aoe_corrections::NamedTuple{(:e, :μ, :f_μ_scs, :μ_scs_slope, :μ_scs_intercept, :σ, :σ_scs, :f_σ_scs)}) where T<:Real - aoe ./= aoe_corrections.f_μ_scs.(e) +function correct_aoe!(aoe::Array{T}, e::Array{T}, aoe_corrections::NamedTuple) where T<:Real + aoe ./= Base.Fix2(f_aoe_mu, aoe_corrections.μ_scs).(e) aoe .-= 1.0 - aoe ./= aoe_corrections.f_σ_scs.(e) + aoe ./= Base.Fix2(f_aoe_sigma, aoe_corrections.σ_scs).(e) end export correct_aoe! +function correct_aoe!(aoe::Array{T}, e::Array{T}, aoe_corrections::PropDict) where T<:Real + correct_aoe!(aoe, e, NamedTuple(aoe_corrections)) +end """ - get_aoe_peakhists(aoe::Array{T}, e::Array{T}) where T<:Real + prepare_dep_peakhist(e::Array{T}, dep::T,; relative_cut::T=0.5, n_bins_cut::Int=500) where T<:Real -Get the histograms of the `DEP` in the AoE spectrum. +Prepare an array of uncalibrated DEP energies for parameter extraction and calibration. # Returns -- `hist`: Histogram of the `DEP` peak -- `stats`: Stats of the `DEP` peak -- `dep`: Position of the `DEP` peak -- `window`: Window size around the `DEP` peak -- `e`: Energy values in the window around the `DEP` peak -- `aoe`: AoE values in the window around the `DEP` peak +- `result`: Result of the initial fit +- `report`: Report of the initial fit """ -function get_dep_peakhists(aoe::Array{T}, e::Array{T}) where T<:Real - # DEP line and window - dep, window = 1592.53, 25.0 +function prepare_dep_peakhist(e::Array{T}, dep::T,; relative_cut::T=0.5, n_bins_cut::Int=500) where T<:Real + # get cut window around peak + cuts = cut_single_peak(e, minimum(e), maximum(e); n_bins=n_bins_cut, relative_cut=relative_cut) + # estimate bin width + bin_width = get_friedman_diaconis_bin_width(e[e .> cuts.low .&& e .< cuts.high]) # create histogram - dephist = fit(Histogram, e, dep-window:0.5:dep+window) + dephist = fit(Histogram, e, minimum(e):bin_width:maximum(e)) + # get peakstats depstats = estimate_single_peak_stats(dephist) - # set peakstats to known peak position - depstats = merge(depstats, (peak_pos = dep, )) - result = ( - hist = dephist, - stats = depstats, - dep = dep, - window = window, - e = e[dep-window .< e .< dep+window], - aoe = aoe[dep-window .< e .< dep+window] - ) - return result + # initial fit for calibration and parameter extraction + result, report = fit_single_peak_th228(dephist, depstats,; uncertainty=true, low_e_tail=false) + # get calibration estimate from peak postion + result = merge(result, (m_calib = dep / result.μ, )) + return result, report end -export get_dep_peakhists +export prepare_dep_peakhist """ - get_n_after_psd_cut(psd_cut::T, aoe::Array{T}, e::Array{T}, peak::T, window::T,; uncertainty=true) where T<:Real + get_n_after_psd_cut(psd_cut::T, aoe::Array{T}, e::Array{T}, peak::T, window::Array{T}, bin_width::T, result_before::NamedTuple, peakstats::NamedTuple; uncertainty=true) where T<:Real -Get the number of counts after a cut value `psd_cut` for a given `peak` and `window` size whiile performing a peak fit with fixed position. - - # Returns +Get the number of counts after a PSD cut value `psd_cut` for a given `peak` and `window` size whiile performing a peak fit with fixed position. The number of counts is determined by fitting the peak with a pseudo prior for the peak position. +# Returns - `n`: Number of counts after the cut - `n_err`: Uncertainty of the number of counts after the cut """ -function get_n_after_psd_cut(psd_cut::T, aoe::Array{T}, e::Array{T}, peak::T, window::T,; uncertainty=true) where T<:Real +function get_n_after_psd_cut(psd_cut::T, aoe::Array{T}, e::Array{T}, peak::T, window::Array{T}, bin_width::T, result_before::NamedTuple, peakstats::NamedTuple; uncertainty::Bool=true, fixed_position::Bool=true) where T<:Real # get energy after cut and create histogram - peakhist = fit(Histogram, e[aoe .> psd_cut], peak-window:0.5:peak+window) - # estimate peak stats - peakstats = estimate_single_peak_stats(peakhist) - # set peakstats to known peak position - peakstats = merge(peakstats, (peak_pos = peak, )) + peakhist = fit(Histogram, e[aoe .> psd_cut], peak-first(window):bin_width:peak+last(window)) + # create pseudo_prior with known peak sigma in signal for more stable fit + pseudo_prior = NamedTupleDist(σ = Normal(result_before.σ, 0.3), ) # fit peak and return number of signal counts - result, _ = fit_single_peak_th228(peakhist, peakstats, uncertainty=uncertainty, fixed_position=true) + result, _ = fit_single_peak_th228(peakhist, peakstats,; uncertainty=uncertainty, fixed_position=fixed_position, low_e_tail=false, pseudo_prior=pseudo_prior) if uncertainty n, n_err = result.n, result.err.n else @@ -108,33 +118,48 @@ export get_n_after_psd_cut """ - get_psd_cut(aoe::Array{T}, e::Array{T},; dep_sf::Float64=0.9) where T<:Real + get_psd_cut(aoe::Array{T}, e::Array{T},; dep::T=1592.53, window::Array{T}=[12.0, 10.0], dep_sf::Float64=0.9, cut_search_interval::Tuple{T,T}=(-25.0, 0.0), rtol::T=0.001) where T<:Real -Get the PSD cut value for a given `DEP` surrival fraction `dep_sf` (Default: 90%). The cut value is determined by finding the cut value where the number of counts after the cut is `dep_sf` of the number of counts before the cut. -The algorithm is based on a root search function and expecting a Bisection. +Get the PSD cut value for a given `dep` and `window` size while performing a peak fit with fixed position. The PSD cut value is determined by finding the cut value for which the number of counts after the cut is equal to `dep_sf` times the number of counts before the cut. +The algorhithm utilizes a root search algorithm to find the cut value with a relative tolerance of `rtol`. # Returns - `cut`: PSD cut value - `n0`: Number of counts before the cut -- `n90`: Number of counts after the cut +- `nsf`: Number of counts after the cut +- `err`: Uncertainties """ -function get_psd_cut(aoe::Array{T}, e::Array{T},; dep_sf::Float64=0.9) where T<:Real - # generate DEP peak histogram - dep_peakhist = get_dep_peakhists(aoe, e) +function get_psd_cut(aoe::Array{T}, e::Array{T},; dep::T=1592.53, window::Array{T}=[12.0, 10.0], dep_sf::Float64=0.9, cut_search_interval::Tuple{T,T}=(-25.0, 0.0), rtol::T=0.001, bin_width_window::T=3.0, fixed_position::Bool=true, sigma_high_sided::T=NaN) where T<:Real + # estimate bin width + bin_width = get_friedman_diaconis_bin_width(e[e .> dep - bin_width_window .&& e .< dep + bin_width_window]) + # create histogram + dephist = fit(Histogram, e, dep-first(window):bin_width:dep+last(window)) + # get peakstats + depstats = estimate_single_peak_stats(dephist) + # cut window around peak + aoe = aoe[dep-first(window) .< e .< dep+last(window)] + e = e[dep-first(window) .< e .< dep+last(window)] + # check if a high sided AoE cut should be applied before the PSD cut is generated + if !isnan(sigma_high_sided) + e = e[aoe .< sigma_high_sided] + aoe = aoe[aoe .< sigma_high_sided] + end # fit before cut - result_before, _ = fit_single_peak_th228(dep_peakhist.hist, dep_peakhist.stats, uncertainty=false, fixed_position=true) + result_before, _ = fit_single_peak_th228(dephist, depstats,; uncertainty=true, fixed_position=fixed_position, low_e_tail=false) # get n0 before cut - n90 = result_before.n * 0.9 + nsf = result_before.n * dep_sf # get psd cut - n_surrival_dep_f = cut -> get_n_after_psd_cut(cut, dep_peakhist.aoe, dep_peakhist.e, dep_peakhist.dep, dep_peakhist.window,; uncertainty=false).n - n90 - cut_search_interval = (-25.0, 0.0) - psd_cut = find_zero(n_surrival_dep_f, cut_search_interval, Bisection()) - return (cut = psd_cut, n0 = result_before.n, n90 = n90) + n_surrival_dep_f = cut -> get_n_after_psd_cut(cut, aoe, e, dep, window, bin_width, result_before, depstats; uncertainty=false, fixed_position=fixed_position).n - nsf + psd_cut = find_zero(n_surrival_dep_f, cut_search_interval, Bisection(), rtol=rtol, maxiters=100) + # return n_surrival_dep_f.(0.25:0.001:0.5) + # get nsf after cut + result_after = get_n_after_psd_cut(psd_cut, aoe, e, dep, window, bin_width, result_before, depstats; uncertainty=true, fixed_position=fixed_position) + return (cut = psd_cut, n0 = result_before.n, nsf = result_after.n, err = (cut = psd_cut * rtol, n0 = result_before.err.n, nsf = result_after.n_err)) end export get_psd_cut """ - get_peak_surrival_fraction(aoe::Array{T}, e::Array{T}, peak::T, window::T, psd_cut::T,; uncertainty=true) where T<:Real + get_peak_surrival_fraction(aoe::Array{T}, e::Array{T}, peak::T, window::Array{T}, psd_cut::T,; uncertainty::Bool=true, low_e_tail::Bool=true) where T<:Real Get the surrival fraction of a peak after a PSD cut value `psd_cut` for a given `peak` and `window` size whiile performing a peak fit with fixed position. @@ -145,23 +170,34 @@ Get the surrival fraction of a peak after a PSD cut value `psd_cut` for a given - `sf`: Surrival fraction - `err`: Uncertainties """ -function get_peak_surrival_fraction(aoe::Array{T}, e::Array{T}, peak::T, window::T, psd_cut::T,; uncertainty=true) where T<:Real +function get_peak_surrival_fraction(aoe::Array{T}, e::Array{T}, peak::T, window::Array{T}, psd_cut::T,; uncertainty::Bool=true, low_e_tail::Bool=true, bin_width_window::T=2.0, sigma_high_sided::T=NaN) where T<:Real + # estimate bin width + bin_width = get_friedman_diaconis_bin_width(e[e .> peak - bin_width_window .&& e .< peak + bin_width_window]) # get energy before cut and create histogram - peakhist = fit(Histogram, e, peak-window:0.5:peak+window) + peakhist = fit(Histogram, e, peak-first(window):bin_width:peak+last(window)) # estimate peak stats peakstats = estimate_single_peak_stats(peakhist) - # set peakstats to known peak position - peakstats = merge(peakstats, (peak_pos = peak, )) # fit peak and return number of signal counts - result_before, report_before = fit_single_peak_th228(peakhist, peakstats, uncertainty=uncertainty, fixed_position=true) + result_before, report_before = fit_single_peak_th228(peakhist, peakstats,; uncertainty=uncertainty, low_e_tail=low_e_tail) + + # get e after cut + if !isnan(sigma_high_sided) + e = e[psd_cut .< aoe .< sigma_high_sided] + else + e = e[psd_cut .< aoe] + end + # estimate bin width + bin_width = get_friedman_diaconis_bin_width(e[e .> peak - bin_width_window .&& e .< peak + bin_width_window]) # get energy after cut and create histogram - peakhist = fit(Histogram, e[aoe .> psd_cut], peak-window:0.5:peak+window) + peakhist = fit(Histogram, e, peak-first(window):bin_width:peak+last(window)) + # create pseudo_prior with known peak sigma in signal for more stable fit + pseudo_prior = NamedTupleDist(μ = ConstValueDist(result_before.μ), σ = Normal(result_before.σ, 0.1)) + pseudo_prior = NamedTupleDist(σ = Normal(result_before.σ, 0.1), ) # estimate peak stats peakstats = estimate_single_peak_stats(peakhist) - # set peakstats to known peak position - peakstats = merge(peakstats, (peak_pos = peak, )) # fit peak and return number of signal counts - result_after, report_after = fit_single_peak_th228(peakhist, peakstats, uncertainty=uncertainty, fixed_position=true) + result_after, report_after = fit_single_peak_th228(peakhist, peakstats,; uncertainty=uncertainty, low_e_tail=low_e_tail) + # result_after, report_after = fit_single_peak_th228(peakhist, peakstats,; uncertainty=uncertainty, low_e_tail=low_e_tail, pseudo_prior=pseudo_prior) # calculate surrival fraction sf = result_after.n / result_before.n result = ( @@ -199,14 +235,14 @@ Get the surrival fraction of a peak after a PSD cut value `psd_cut` for a given - `result`: Dict of results for each peak - `report`: Dict of reports for each peak """ -function get_peaks_surrival_fractions(aoe::Array{T}, e::Array{T}, peaks::Array{T}, peak_names::Array{Symbol}, windows::Array{T}, psd_cut::T,; uncertainty=true) where T<:Real +function get_peaks_surrival_fractions(aoe::Array{T}, e::Array{T}, peaks::Array{T}, peak_names::Array{Symbol}, windows::Array{Tuple{T, T}}, psd_cut::T,; uncertainty::Bool=true, bin_width_window::T=2.0, low_e_tail::Bool=true, sigma_high_sided::T=NaN) where T<:Real # create return and result dicts result = Dict{Symbol, NamedTuple}() report = Dict{Symbol, NamedTuple}() # iterate throuh all peaks for (peak, name, window) in zip(peaks, peak_names, windows) # fit peak - result_peak, report_peak = get_peak_surrival_fraction(aoe, e, peak, window, psd_cut; uncertainty=uncertainty) + result_peak, report_peak = get_peak_surrival_fraction(aoe, e, peak, collect(window), psd_cut; uncertainty=uncertainty, bin_width_window=bin_width_window, low_e_tail=low_e_tail, sigma_high_sided=sigma_high_sided) # save results result[name] = result_peak report[name] = report_peak @@ -228,11 +264,15 @@ Get the surrival fraction of a continuum after a PSD cut value `psd_cut` for a g - `sf`: Surrival fraction - `err`: Uncertainties """ -function get_continuum_surrival_fraction(aoe::Array{T}, e::Array{T}, center::T, window::T, psd_cut::T,; uncertainty=true) where T<:Real +function get_continuum_surrival_fraction(aoe::Array{T}, e::Array{T}, center::T, window::T, psd_cut::T,; uncertainty=true, sigma_high_sided=sigma_high_sided) where T<:Real # get number of events in window before cut n_before = length(e[center-window .< e .< center+window]) # get number of events after cut n_after = length(e[aoe .> psd_cut .&& center-window .< e .< center+window]) + if !isnan(sigma_high_sided) + n_after = length(e[psd_cut .< aoe .< sigma_high_sided .&& center-window .< e .< center+window]) + end + n_after = length(e[aoe .> psd_cut .&& center-window .< e .< center+window]) # calculate surrival fraction sf = n_after / n_before result = ( diff --git a/src/aoefit.jl b/src/aoefit.jl index 4607cf8a..7628f8c3 100644 --- a/src/aoefit.jl +++ b/src/aoefit.jl @@ -36,10 +36,23 @@ function estimate_single_peak_stats_psd(h::Histogram{T}) where T<:Real peak_pos = (peak_max_pos + peak_mid_pos) / 2.0 peak_fwhm = (E[fwhm_idx_right] - E[fwhm_idx_left]) / 1.0 peak_sigma = peak_fwhm * inv(2*√(2log(2))) + # make sure that peakstats have non-zero sigma and fwhm values to prevent fit priors from being zero + if peak_fwhm == 0 + fwqm_idx_left = findfirst(w -> w >= (first(W) + peak_amplitude) / 4, W) + fwqm_idx_right = findlast(w -> w >= (last(W) + peak_amplitude) / 4, W) + peak_fwqm = (E[fwqm_idx_right] - E[fwqm_idx_left]) / 1.0 + peak_sigma = peak_fwqm * inv(2*√(2log(4))) + peak_fwhm = peak_sigma * 2*√(2log(2)) + end #peak_area = peak_amplitude * peak_sigma * sqrt(2*π) - mean_background = (first(W) + last(W)) / 2 - mean_background = ifelse(mean_background == 0, 0.01, mean_background) - peak_counts = inv(0.761) * (sum(view(W,fwhm_idx_left:fwhm_idx_right)) - mean_background * peak_fwhm) + # mean_background = (first(W) + last(W)) / 2 + # five_sigma_idx_left = findfirst(e -> e >= peak_pos - 5*peak_sigma, E) + three_sigma_idx_left = findfirst(e -> e >= peak_pos - 3*peak_sigma, E) + mean_background = convert(typeof(peak_pos), (sum(view(W, 1:three_sigma_idx_left)))) + mean_background = ifelse(mean_background == 0.0, 100.0, mean_background) + # peak_counts = inv(0.761) * (sum(view(W,fwhm_idx_left:fwhm_idx_right)) - mean_background * peak_fwhm) + # peak_counts = sum(view(W,three_sigma_idx_left:lastindex(W))) / (1 - exp(-3)) + peak_counts = 2*sum(view(W,peak_idx:lastindex(W))) ( peak_pos = peak_pos, @@ -62,21 +75,32 @@ the binning is only done in the area around the peak. The peak parameters are es * `peakstats`: StructArray of peak parameters for each compton band * `min_aoe`: Array of minimum A/E values for each compton band * `max_aoe`: Array of maximum A/E values for each compton band + * `mean_peak_pos`: Mean peak position of all compton bands + * `std_peak_pos`: Standard deviation of the peak position of all compton bands + * `simple_pars_aoe_μ`: Simple curve fit parameters for the peak position energy depencence + * `simple_pars_error_aoe_μ`: Simple curve fit parameter errors for the peak position energy depencence + * `simple_pars_aoe_σ`: Simple curve fit parameters for the peak sigma energy depencence + * `simple_pars_error_aoe_σ`: Simple curve fit parameter errors for the peak sigma energy depencence """ function generate_aoe_compton_bands(aoe::Array{<:Real}, e::Array{<:Real}, compton_bands::Array{<:Real}, compton_window::T) where T<:Real # get aoe values in compton bands - aoe_compton_bands = [aoe[(e .> c) .&& (e .< c + compton_window) .&& (aoe .> 0.0)] for c in compton_bands] + aoe_compton_bands = [aoe[c .< e .< c + compton_window .&& aoe .> 0.0] for c in compton_bands] # can constrain data to the area around the peak max_aoe = [quantile(aoe_c, 0.99) + 0.05 for aoe_c in aoe_compton_bands] - min_aoe = [quantile(aoe_c, 0.1) for aoe_c in aoe_compton_bands] + min_aoe = [quantile(aoe_c, 0.2) for aoe_c in aoe_compton_bands] half_quantile_aoe = [quantile(aoe_c, 0.5) for aoe_c in aoe_compton_bands] # Freedman-Diaconis Rule for binning only in the area aroung the peak - bin_width = [2 * (quantile(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]], 0.75) - quantile(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]], 0.25)) / ∛(length(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]])) for (i, aoe_c) in enumerate(aoe_compton_bands)] + # bin_width = [2 * (quantile(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]], 0.75) - quantile(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]], 0.25)) / ∛(length(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]])) for (i, aoe_c) in enumerate(aoe_compton_bands)] + bin_width = [get_friedman_diaconis_bin_width(aoe_c[half_quantile_aoe[i] .< aoe_c .< max_aoe[i]])/2 for (i, aoe_c) in enumerate(aoe_compton_bands)] + # n_bins = [round(Int, (max_aoe[i] - half_quantile_aoe[i]) / get_friedman_diaconis_bin_width(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]])) for (i, aoe_c) in enumerate(aoe_compton_bands)] + # cuts = [cut_single_peak(aoe_c, min_aoe[i], max_aoe[i]; n_bins=n_bins[i], relative_cut=0.5) for (i, aoe_c) in enumerate(aoe_compton_bands)] + # cuts = [cut_single_peak(aoe_c, min_aoe[i], max_aoe[i]; n_bins=-1, relative_cut=0.5) for (i, aoe_c) in enumerate(aoe_compton_bands)] + # bin_width = [get_friedman_diaconis_bin_width(aoe_c[cuts[i].low .< aoe_c .< cuts[i].high]) for (i, aoe_c) in enumerate(aoe_compton_bands)] # generate histograms - peakhists = [fit(Histogram, aoe_compton_bands[i], min_aoe[i]:bin_width[i]:max_aoe[i]) for i in eachindex(aoe_compton_bands)] + peakhists = [fit(Histogram, aoe_compton_bands[i], min_aoe[i]:bin_width[i]/2:max_aoe[i]) for i in eachindex(aoe_compton_bands)] # estimate peak parameters peakstats = StructArray(estimate_single_peak_stats_psd.(peakhists)) @@ -94,29 +118,56 @@ function generate_aoe_compton_bands(aoe::Array{<:Real}, e::Array{<:Real}, compto # simple curve fit for parameter extraction simple_fit_aoe_μ = curve_fit(f_aoe_μ, compton_bands[peak_pos_cut], peak_pos[peak_pos_cut], [0.0, mean_peak_pos]) simple_pars_aoe_μ = simple_fit_aoe_μ.param - simple_pars_error_aoe_μ = standard_errors(simple_fit_aoe_μ) + simple_pars_error_aoe_μ = zeros(length(simple_pars_aoe_μ)) + try + simple_pars_error_aoe_μ = standard_errors(simple_fit_aoe_μ) + catch e + @warn "Error calculating standard errors for simple fitted μ: $e" + end # estimate peak sigmas energy depencence peak_sigma = peakstats.peak_sigma - mean_peak_sigma_end, std_peak_sigma_end = mean(peak_sigma[20:end]), std(peak_sigma[20:end]) + mean_peak_sigma, std_peak_sigma = mean(peak_sigma[20:end]), std(peak_sigma[20:end]) # simple curve fit for parameter extraction - simple_fit_aoe_σ = curve_fit(f_aoe_σ, compton_bands, peak_sigma, [0.0, 0.0, mean_peak_sigma_end]) + simple_fit_aoe_σ = curve_fit(f_aoe_σ, compton_bands, peak_sigma, [0.0, 0.0, mean_peak_sigma]) simple_pars_aoe_σ = simple_fit_aoe_σ.param - simple_pars_error_aoe_σ = standard_errors(simple_fit_aoe_σ) + simple_pars_error_aoe_σ = zeros(length(simple_pars_aoe_σ)) + try + simple_pars_error_aoe_σ = standard_errors(simple_fit_aoe_σ) + catch e + @warn "Error calculating standard errors for simple fitted σ: $e" + end + + + # Recalculate max_aoe to get rid out high-A/E outliers + max_aoe = peakstats.peak_pos .+ 4 .* abs.(peakstats.peak_sigma) + # Recalculate min_aoe to focus on main peak + min_aoe = peakstats.peak_pos .- 20 .* abs.(peakstats.peak_sigma) + min_3sigma_aoe = peakstats.peak_pos .- 3 .* abs.(peakstats.peak_sigma) + # Freedman-Diaconis Rule for binning only in the area aroung the peak + # bin_width = [get_friedman_diaconis_bin_width(aoe_c[aoe_c .> half_quantile_aoe[i] .&& aoe_c .< max_aoe[i]])/4 for (i, aoe_c) in enumerate(aoe_compton_bands)] + bin_width = [get_friedman_diaconis_bin_width(aoe_c[aoe_c .> min_3sigma_aoe[i] .&& aoe_c .< max_aoe[i]])/4 for (i, aoe_c) in enumerate(aoe_compton_bands)] + + # regenerate histograms + peakhists = [fit(Histogram, aoe_compton_bands[i], min_aoe[i]:bin_width[i]:max_aoe[i]) for i in eachindex(aoe_compton_bands)] + + # reestimate peak parameters + peakstats = StructArray(estimate_single_peak_stats_psd.(peakhists)) ( - peakhists = peakhists, - peakstats = peakstats, - min_aoe = min_aoe, - max_aoe = max_aoe, - mean_peak_pos = mean_peak_pos, - std_peak_pos = std_peak_pos, - simple_pars_aoe_μ = simple_pars_aoe_μ, - simple_pars_error_aoe_μ = simple_pars_error_aoe_μ, - mean_peak_sigma = mean_peak_sigma_end, - std_peak_sigma = std_peak_sigma_end, - simple_pars_aoe_σ = simple_pars_aoe_σ, - simple_pars_error_aoe_σ = simple_pars_error_aoe_σ + ; + peakhists, + peakstats, + min_aoe, + max_aoe, + mean_peak_pos, + std_peak_pos, + simple_pars_aoe_μ, + simple_pars_error_aoe_μ, + mean_peak_sigma, + std_peak_sigma, + simple_pars_aoe_σ, + simple_pars_error_aoe_σ ) end export generate_aoe_compton_bands @@ -132,40 +183,25 @@ Fit the A/E Compton bands using the `f_aoe_compton` function consisting of a gau * `result`: Dict of NamedTuples of the fit results containing values and errors for each compton band * `report`: Dict of NamedTuples of the fit report which can be plotted for each compton band """ -function fit_aoe_compton(peakhists::Array, peakstats::StructArray, compton_bands::Array{T},; pars_aoe::NamedTuple{(:μ, :μ_err, :σ, :σ_err)}) where T<:Real - if isempty(pars_aoe) - # create return and result dicts - result = Dict{T, NamedTuple}() - report = Dict{T, NamedTuple}() - # iterate throuh all peaks - for (i, band) in enumerate(compton_bands) - # get histogram and peakstats - h = peakhists[i] - ps = peakstats[i] - # fit peak - result_band, report_band = fit_single_aoe_compton(h, ps, ; uncertainty=false) - # save results - result[band] = result_band - report[band] = report_band - end - return result, report - else - # create return and result dicts - result = Dict{T, NamedTuple}() - report = Dict{T, NamedTuple}() - # iterate throuh all peaks - for (i, band) in enumerate(compton_bands) - # get histogram and peakstats - h = peakhists[i] +function fit_aoe_compton(peakhists::Array, peakstats::StructArray, compton_bands::Array{T},; pars_aoe::NamedTuple{(:μ, :μ_err, :σ, :σ_err)}=NamedTuple{(:μ, :μ_err, :σ, :σ_err)}(nothing, nothing, nothing, nothing), uncertainty::Bool=false) where T<:Real + # create return and result dicts + result = Dict{T, NamedTuple}() + report = Dict{T, NamedTuple}() + # iterate throuh all peaks + for (i, band) in enumerate(compton_bands) + # get histogram and peakstats + h = peakhists[i] + ps = peakstats[i] + if !isnothing(pars_aoe.μ) ps = merge(peakstats[i], (μ = f_aoe_μ(band, pars_aoe.μ), σ = f_aoe_σ(band, pars_aoe.σ))) - # fit peak - result_band, report_band = fit_single_aoe_compton(h, ps, ; uncertainty=false) - # save results - result[band] = result_band - report[band] = report_band end - return result, report + # fit peak + result_band, report_band = fit_single_aoe_compton(h, ps, ; uncertainty=uncertainty) + # save results + result[band] = result_band + report[band] = report_band end + return result, report end export fit_aoe_compton @@ -179,16 +215,37 @@ Perform a fit of the peakshape to the data in `h` using the initial values in `p * `result`: NamedTuple of the fit results containing values and errors * `report`: NamedTuple of the fit report which can be plotted """ -function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}}; uncertainty::Bool=true) where T<:Real +function fit_single_aoe_compton(h::Histogram, ps::NamedTuple; uncertainty::Bool=true) # create pseudo priors pseudo_prior = NamedTupleDist( - μ = Uniform(ps.peak_pos-3*ps.peak_sigma, ps.peak_pos+3*ps.peak_sigma), - σ = weibull_from_mx(ps.peak_sigma, 5*ps.peak_sigma), - n = weibull_from_mx(ps.peak_counts, 7*ps.peak_counts), - B = weibull_from_mx(ps.mean_background, 5*ps.mean_background), - δ = weibull_from_mx(0.1, 0.5) + μ = Uniform(ps.peak_pos-0.5*ps.peak_sigma, ps.peak_pos+0.5*ps.peak_sigma), + # σ = weibull_from_mx(ps.peak_sigma, 2*ps.peak_sigma), + σ = Uniform(0.95*ps.peak_sigma, 1.05*ps.peak_sigma), + # σ = Normal(ps.peak_sigma, 0.01*ps.peak_sigma), + # n = weibull_from_mx(ps.peak_counts, 1.1*ps.peak_counts), + # n = Normal(ps.peak_counts, 0.5*ps.peak_counts), + # n = Normal(0.9*ps.peak_counts, 0.5*ps.peak_counts), + n = LogUniform(0.01*ps.peak_counts, 5*ps.peak_counts), + # n = Uniform(0.8*ps.peak_counts, 1.2*ps.peak_counts), + # B = weibull_from_mx(ps.mean_background, 1.2*ps.mean_background), + # B = Normal(ps.mean_background, 0.8*ps.mean_background), + B = LogUniform(0.1*ps.mean_background, 10*ps.mean_background), + # B = Uniform(0.8*ps.mean_background, 1.2*ps.mean_background), + # B = Uniform(0.8*ps.mean_background, 1.2*ps.mean_background), + # δ = weibull_from_mx(0.1, 0.8) + δ = LogUniform(0.01, 1.0) ) - + if haskey(ps, :μ) + # create pseudo priors + pseudo_prior = NamedTupleDist( + μ = weibull_from_mx(ps.μ, 2*ps.μ), + σ = weibull_from_mx(ps.σ, 2*ps.σ), + n = weibull_from_mx(ps.peak_counts, 2*ps.peak_counts), + B = weibull_from_mx(ps.mean_background, 2*ps.mean_background), + δ = weibull_from_mx(0.1, 0.8) + ) + end + # transform back to frequency space f_trafo = BAT.DistributionTransform(Normal, pseudo_prior) @@ -197,7 +254,7 @@ function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_f # create loglikehood function f_loglike = let f_fit=f_aoe_compton, h=h - v -> hist_loglike(Base.Fix2(f_fit, v), h) + v -> hist_loglike(x -> x in Interval(extrema(h.edges[1])...) ? f_fit(x, v) : 0, h) end # MLE @@ -206,16 +263,38 @@ function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_f # best fit results v_ml = inverse(f_trafo)(Optim.minimizer(opt_r)) - f_loglike_array = let f_fit=f_aoe_compton, h=h - v -> - hist_loglike(x -> f_fit(x, v...), h) + f_loglike_array = let f_fit=aoe_compton_peakshape, h=h + v -> - hist_loglike(x -> x in Interval(extrema(h.edges[1])...) ? f_fit(x, v...) : 0, h) end + # get p-value + mle = f_loglike(v_ml) + + weights_rand = rand(Product(Poisson.(h.weights)), 10000) + + f_loglike_h = let f_fit=f_aoe_compton, v=v_ml + w -> hist_loglike.( + x -> x in Interval(extrema(h.edges[1])...) ? f_fit(x, v) : 0, + fit.(Histogram, Ref(midpoints(h.edges[1])), weights.(w), Ref(h.edges[1])) + ) + end + + mle_rand = f_loglike_h(eachcol(weights_rand)) + + p_value = count(mle_rand .> mle) / length(mle_rand) + if uncertainty # Calculate the Hessian matrix using ForwardDiff H = ForwardDiff.hessian(f_loglike_array, tuple_to_array(v_ml)) - # Calculate the parameter covariance matrix - param_covariance = inv(H) + param_covariance = nothing + if !all(isfinite.(H)) + @warn "Hessian matrix is not finite" + param_covariance = zeros(length(v_ml), length(v_ml)) + else + # Calculate the parameter covariance matrix + param_covariance = inv(H) + end # Extract the parameter uncertainties v_ml_err = array_to_tuple(sqrt.(abs.(diag(param_covariance))), v_ml) @@ -227,7 +306,7 @@ function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_f @debug "n: $(v_ml.n) ± $(v_ml_err.n)" @debug "B: $(v_ml.B) ± $(v_ml_err.B)" - result = merge(v_ml, (err = v_ml_err, )) + result = merge(merge(v_ml, (p_value = p_value, )), (err = v_ml_err, )) else @debug "Best Fit values" @debug "μ: $(v_ml.μ)" @@ -235,7 +314,7 @@ function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_f @debug "n: $(v_ml.n)" @debug "B: $(v_ml.B)" - result = v_ml + result = merge(v_ml, (p_value = p_value, )) end report = ( v = v_ml, @@ -246,72 +325,3 @@ function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_f ) return result, report end - - -# function fit_single_aoe_compton(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background, :μ, :σ), NTuple{7, T}}; uncertainty::Bool=true) where T<:Real -# # create pseudo priors -# pseudo_prior = NamedTupleDist( -# μ = weibull_from_mx(ps.μ, 2*ps.μ), -# σ = weibull_from_mx(ps.σ, 2*ps.σ), -# n = weibull_from_mx(ps.peak_counts, 2*ps.peak_counts), -# B = weibull_from_mx(ps.mean_background, 3*ps.mean_background), -# δ = weibull_from_mx(0.1, 0.8) -# ) - -# # transform back to frequency space -# f_trafo = BAT.DistributionTransform(Normal, pseudo_prior) - -# # start values for MLE -# v_init = mean(pseudo_prior) - -# # create loglikehood function -# f_loglike = let f_fit=f_aoe_compton, h=h -# v -> hist_loglike(Base.Fix2(f_fit, v), h) -# end - -# # MLE -# opt_r = optimize((-) ∘ f_loglike ∘ inverse(f_trafo), f_trafo(v_init)) - -# # best fit results -# v_ml = inverse(f_trafo)(Optim.minimizer(opt_r)) - -# f_loglike_array = let f_fit=f_aoe_compton, h=h -# v -> - hist_loglike(x -> f_fit(x, v...), h) -# end - -# if uncertainty -# # Calculate the Hessian matrix using ForwardDiff -# H = ForwardDiff.hessian(f_loglike_array, tuple_to_array(v_ml)) - -# # Calculate the parameter covariance matrix -# param_covariance = inv(H) - -# # Extract the parameter uncertainties -# v_ml_err = array_to_tuple(sqrt.(abs.(diag(param_covariance))), v_ml) - - -# @debug "Best Fit values" -# @debug "μ: $(v_ml.μ) ± $(v_ml_err.μ)" -# @debug "σ: $(v_ml.σ) ± $(v_ml_err.σ)" -# @debug "n: $(v_ml.n) ± $(v_ml_err.n)" -# @debug "B: $(v_ml.B) ± $(v_ml_err.B)" - -# result = merge(v_ml, (err = v_ml_err, )) -# else -# @debug "Best Fit values" -# @debug "μ: $(v_ml.μ)" -# @debug "σ: $(v_ml.σ)" -# @debug "n: $(v_ml.n)" -# @debug "B: $(v_ml.B)" - -# result = v_ml -# end -# report = ( -# v = v_ml, -# h = h, -# f_fit = x -> Base.Fix2(f_aoe_compton, v_ml)(x), -# f_sig = x -> Base.Fix2(f_aoe_sig, v_ml)(x), -# f_bck = x -> Base.Fix2(f_aoe_bkg, v_ml)(x) -# ) -# return result, report -# end diff --git a/src/auto_calibration.jl b/src/auto_calibration.jl index 122e832a..b800bcc5 100644 --- a/src/auto_calibration.jl +++ b/src/auto_calibration.jl @@ -11,18 +11,22 @@ end Compute an energy calibration from raw reconstructed energy deposition values. """ -function autocal_energy(E_raw::AbstractArray{<:Real}) - window_sizes = [25.0] - n_bins = 15000 - th228_lines = [2614.50] +function autocal_energy(E_raw::AbstractArray{<:Real},; quantile_perc::Real=0.995) + # 0.5 keV standard binning cal_hist_binning = 0:0.5:3000 - quantile_perc = 0.995 - result, report = simple_calibration( - E_raw, th228_lines, window_sizes,; - n_bins=n_bins, quantile_perc=quantile_perc, calib_type=:th228 - ) - calib_constant = result.c + # eiher generate FEP guess by quantile or use NaN to use peakfinder + if isnan(quantile_perc) + h_uncal = fit(Histogram, E_raw, 0:1.0:maximum(E_raw)) + _, peakpos = RadiationSpectra.peakfinder(h_uncal, σ=5.0, backgroundRemove=true, threshold=10) + fep_guess = sort(peakpos)[end] + else + fep_guess = quantile(E_raw, quantile_perc) + end + # generate calibratio constant + calib_constant = 2614.5 / fep_guess + # generate calibration function f_calib = Base.Fix1(*, calib_constant * u"keV") + # generate calibrated energy values and calibrated histogram E_cal_keV = ustrip.(f_calib.(E_raw)) cal_hist = fit(Histogram, E_cal_keV, cal_hist_binning) return (result = f_calib, diagnostics = EnergyCalibrationDiagnostics(cal_hist)) @@ -40,4 +44,5 @@ function calibrate_energy!(e::Array{T}, pars::PropDict) where T<:Real e .*= pars.m_calib e .+= pars.n_calib end +export calibrate_energy! diff --git a/src/ctc.jl b/src/ctc.jl index b70fc3c0..dcb0f643 100644 --- a/src/ctc.jl +++ b/src/ctc.jl @@ -36,13 +36,20 @@ of `window`. The drift time dependence is given by `qdrift`. * `fct`: correction factor * `bin_width`: optimal bin width """ -function ctc_energy(e::Array{T}, qdrift::Array{T}, peak::T, window::T) where T<:Real +function ctc_energy(e::Array{T}, qdrift::Array{T}, peak::T, window::Tuple{T, T}) where T<:Real # create cut window around peak - cut = peak - window .< e .< peak + window + cut = peak - first(window) .< e .< peak + last(window) e_cut, qdrift_cut = e[cut], qdrift[cut] # calculate optimal bin width - bin_width = 2 * (quantile(e_cut, 0.75) - quantile(e_cut, 0.25)) / ∛(length(e_cut)) - bin_width_qdrift = 2 * (quantile(qdrift_cut, 0.75) - quantile(qdrift_cut, 0.25)) / ∛(length(qdrift_cut)) + bin_width = get_friedman_diaconis_bin_width(e[peak - 5 .< e .< peak + 5]) + bin_width_qdrift = get_friedman_diaconis_bin_width(qdrift[peak - 5 .< e .< peak + 5]) + + # get FWHM before correction + # fit peak + h_before = fit(Histogram, e_cut, minimum(e_cut):bin_width:maximum(e_cut)) + ps_before = estimate_single_peak_stats(h_before) + result_before, report_before = fit_single_peak_th228(h_before, ps_before; uncertainty=true) + # create function to minimize f_minimize = let f_optimize=f_optimize_ctc, e=e_cut, qdrift=qdrift_cut, bin_width=bin_width fct -> f_optimize(fct, e, qdrift, bin_width) @@ -53,14 +60,23 @@ function ctc_energy(e::Array{T}, qdrift::Array{T}, peak::T, window::T) where T<: opt_r = optimize(f_minimize, fct_range[1], fct_range[2], fct_start, Fminbox(GradientDescent()), Optim.Options(iterations=1000, show_trace=false, time_limit=600)) # get optimal correction factor fct = Optim.minimizer(opt_r)[1] + # calculate drift time corrected energy e_ctc = e_cut .+ fct .* qdrift_cut + # get FWHM after correction + # fit peak + h_after = fit(Histogram, e_ctc, minimum(e_ctc):bin_width:maximum(e_ctc)) + ps_after = estimate_single_peak_stats(h_after) + result_after, report_after = fit_single_peak_th228(h_after, ps_after; uncertainty=true) result = ( peak = peak, window = window, fct = fct, bin_width = bin_width, bin_width_qdrift = bin_width_qdrift, + fwhm_before = result_before.fwhm, + fwhm_after = result_after.fwhm, + err = (fwhm_before = result_before.err.fwhm, fwhm_after = result_after.err.fwhm) ) report = ( peak = result.peak, @@ -71,8 +87,13 @@ function ctc_energy(e::Array{T}, qdrift::Array{T}, peak::T, window::T) where T<: e_peak = e_cut, e_ctc = e_ctc, qdrift_peak = qdrift_cut, - h_before = fit(Histogram, e_cut, minimum(e_cut):bin_width:maximum(e_cut)), - h_after = fit(Histogram, e_ctc, minimum(e_ctc):bin_width:maximum(e_ctc)) + h_before = h_before, + h_after = h_after, + fwhm_before = result_before.fwhm, + fwhm_after = result_after.fwhm, + err = (fwhm_before = result_before.err.fwhm, fwhm_after = result_after.err.fwhm), + report_before = report_before, + report_after = report_after ) return result, report end diff --git a/src/cut.jl b/src/cut.jl index 42e404ef..ab426403 100644 --- a/src/cut.jl +++ b/src/cut.jl @@ -1,6 +1,6 @@ """ - cut_single_peak(x::Array, min_x::Float64, max_x::Float64, n_bins::Int=15000, relative_cut::Float64=0.5) + cut_single_peak(x::Array, min_x::Float64, max_x::Float64,; n_bins::Int=15000, relative_cut::Float64=0.5) Cut out a single peak from the array `x` between `min_x` and `max_x`. The number of bins is the number of bins to use for the histogram. @@ -10,7 +10,7 @@ The relative cut is the fraction of the maximum counts to use for the cut. * `low`: lower edge of the cut peak * `high`: upper edge of the cut peak """ -function cut_single_peak(x::Vector{T}, min_x::T, max_x::T, n_bins::Int=1000, relative_cut::Float64=0.5) where T<:Unitful.RealOrRealQuantity +function cut_single_peak(x::Vector{T}, min_x::T, max_x::T,; n_bins::Int=1000, relative_cut::Float64=0.5) where T<:Unitful.RealOrRealQuantity @assert unit(min_x) == unit(max_x) == unit(x[1]) "Units of min_x, max_x and x must be the same" x_unit = unit(x[1]) x, min_x, max_x = ustrip.(x), ustrip(min_x), ustrip(max_x) @@ -18,7 +18,12 @@ function cut_single_peak(x::Vector{T}, min_x::T, max_x::T, n_bins::Int=1000, rel # cut out window of interest x = x[(x .> min_x) .&& (x .< max_x)] # fit histogram - h = fit(Histogram, x, nbins=n_bins) + if n_bins < 0 + bin_width = get_friedman_diaconis_bin_width(x) + h = fit(Histogram, x, minimum(x):bin_width:maximum(x)) + else + h = fit(Histogram, x, nbins=n_bins) + end # find peak cts_argmax = mapslices(argmax, h.weights, dims=1)[1] cts_max = h.weights[cts_argmax] @@ -32,3 +37,73 @@ function cut_single_peak(x::Vector{T}, min_x::T, max_x::T, n_bins::Int=1000, rel end export cut_single_peak + +""" + get_centered_gaussian_window_cut(x::Array, min_x::Float64, max_x::Float64, n_σ::Real, center::Float64=0.0, n_bins_cut::Int=500, relative_cut::Float64=0.2, left::Bool=false) + +Cut out a single peak from the array `x` between `min_x` and `max_x` by fitting a truncated one-sided Gaussian and extrapolating a window cut with `n_σ` standard deviations. +The `center` and side of the fit can be specified with `left` and `center` variable. +# Returns + * `low_cut`: lower edge of the cut peak + * `high_cut`: upper edge of the cut peak + * `center`: center of the peak + * `σ`: standard deviation of the Gaussian + * `low_cut_fit`: lower edge of the cut peak from the fit + * `high_cut_fit`: upper edge of the cut peak from the fit + * `err`: error of the fit parameters +""" +function get_centered_gaussian_window_cut(x::Vector{T}, min_x::T, max_x::T, n_σ::Real,; center::T=zero(x[1]), n_bins_cut::Int=500, relative_cut::Float64=0.2, left::Bool=false, fixed_center::Bool=true) where T<:Unitful.RealOrRealQuantity + @assert unit(min_x) == unit(max_x) == unit(x[1]) "Units of min_x, max_x and x must be the same" + # prepare data + x_unit = unit(x[1]) + x, min_x, max_x, center = ustrip.(x), ustrip(min_x), ustrip(max_x), ustrip(center) + + # get cut window around peak + cuts = cut_single_peak(x, min_x, max_x,; n_bins=n_bins_cut, relative_cut=relative_cut) + + # fit half centered gaussian to define sigma width + if !fixed_center + result_fit, report_fit = fit_half_trunc_gauss(x, cuts,; left=left) + else + result_fit, report_fit = fit_half_centered_trunc_gauss(x, center, cuts,; left=left) + end + + # get bin width + bin_width = get_friedman_diaconis_bin_width(x[x .> result_fit.μ - 0.5*result_fit.σ .&& x .< result_fit.μ + 0.5*result_fit.σ]) + # prepare histogram + h = fit(Histogram, x, result_fit.μ-5*result_fit.σ:bin_width:result_fit.μ+5*result_fit.σ) + # norm fitted distribution for better plotting + # n_fit = length(x[ifelse(left, cuts.low, result_fit.μ) .< x .< ifelse(left, result_fit.μ, cuts.high)]) + # n_fit = length(x) + # x_fit = ifelse(left, cuts.low:(result_fit.μ-cuts.low)/1000:result_fit.μ, result_fit.μ:(cuts.high-result_fit.μ)/1000:cuts.high) + # pdf_norm = n_fit / sum(report_fit.f_fit.(x_fit)) + + result = ( + low_cut = (result_fit.μ - n_σ*result_fit.σ)*x_unit, + high_cut = (result_fit.μ + n_σ*result_fit.σ)*x_unit, + center = result_fit.μ*x_unit, + σ = result_fit.σ*x_unit, + low_cut_fit = ifelse(left, cuts.low, result_fit.μ), + high_cut_fit = ifelse(left, result_fit.μ, cuts.high), + max_cut_fit = cuts.max, + err = ( + low_cut = n_σ*result_fit.σ_err*x_unit, + high_cut = n_σ*result_fit.σ_err*x_unit, + center = result_fit.μ_err*x_unit, + σ = result_fit.σ_err*x_unit + ) + ) + report = ( + h = LinearAlgebra.normalize(h, mode=:pdf), + f_fit = t -> report_fit.f_fit(t), + x_fit = ifelse(left, cuts.low:(result_fit.μ-cuts.low)/1000:result_fit.μ, result_fit.μ:(cuts.high-result_fit.μ)/1000:cuts.high), + low_cut = result.low_cut, + high_cut = result.high_cut, + low_cut_fit = result.low_cut_fit, + high_cut_fit = result.high_cut_fit, + center = result.center, + σ = result.σ, + ) + return result, report +end +export get_centered_gaussian_window_cut diff --git a/src/filter_optimization.jl b/src/filter_optimization.jl new file mode 100644 index 00000000..794a2035 --- /dev/null +++ b/src/filter_optimization.jl @@ -0,0 +1,240 @@ +""" + fit_enc_sigmas(enc_grid::Matrix{T}, enc_grid_rt::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, min_enc::T, max_enc::T, nbins::Int64, rel_cut_fit::T) where T<:Real + +Fit the ENC values in `enc_grid` for each RT in `enc_grid_rt` with a Gaussian and return the optimal RT and the corresponding ENC value. + +# Arguments +- `enc_grid`: 2D array of ENC values for each RT in `enc_grid_rt` +- `enc_grid_rt`: 1D array of RT values for which the ENC values in `enc_grid` are calculated +- `min_enc`: minimum ENC value to consider for the fit +- `max_enc`: maximum ENC value to consider for the fit +- `nbins`: number of bins to use for the histogram of ENC values +- `rel_cut_fit`: relative cut value to use for the fit + +# Returns +- `rt`: optimal RT value +- `min_enc`: corresponding ENC value +""" +function fit_enc_sigmas(enc_grid::Matrix{T}, enc_grid_rt::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, min_enc::T, max_enc::T, nbins::Int64, rel_cut_fit::T) where T<:Real + @assert size(enc_grid, 1) == length(enc_grid_rt) "enc_grid and enc_grid_rt must have the same number of columns" + + # create empty array for results + enc = zeros(length(enc_grid_rt)) + enc_err = zeros(length(enc_grid_rt)) + + for (r, rt) in enumerate(enc_grid_rt) + # get enc for this rt + enc_rt = flatview(enc_grid)[r, :] + # sanity check + if all(enc_rt .== 0.0) + continue + end + # get cut value + cuts = cut_single_peak(enc_rt, min_enc, max_enc,; n_bins=nbins, relative_cut=rel_cut_fit) + + # fit gaussian + result, _ = fit_single_trunc_gauss(enc_rt, cuts) + + # get sigma + enc[r] = result.σ + enc_err[r] = result.σ_err + end + + # get minimal enc and rt + min_enc = minimum(enc[enc .> 0]) + rt_min_enc = enc_grid_rt[enc .> 0][findmin(enc[enc .> 0])[2]] + + # generate result and report + result = ( + rt = rt_min_enc, + rt_err = step(enc_grid_rt), + min_enc = min_enc + ) + report = ( + rt = result.rt, + min_enc = result.min_enc, + enc_grid_rt = collect(enc_grid_rt), + enc = enc, + enc_err = enc_err + ) + return result, report + +end +export fit_enc_sigmas + +""" +fit_fwhm_ft_fep(e_grid::Matrix, e_grid_ft::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, rt::Unitful.RealOrRealQuantity, min_e::T, max_e::T, nbins::Int64, rel_cut_fit::T; default_ft::Quantity{T}=3.0u"µs") where {T <:Real} + +Fit the FWHM values in `e_grid` for each FT in `e_grid_ft` with a Gamma Peakshape and return the optimal FT and the corresponding FWHM value. The cut values cut for each flat-top time a window for better histogramming. + +# Arguments +- `e_grid`: 2D array of energy values for each FT in `e_grid_ft` +- `e_grid_ft`: 1D array of FT values for which the FWHM values in `e_grid` are calculated +- `rt`: RT value for which the FWHM values in `e_grid` are calculated +- `min_e`: minimum energy value to consider for the fit +- `max_e`: maximum energy value to consider for the fit +- `nbins`: number of bins to use for the histogram of energy values +- `rel_cut_fit`: relative cut value to use for the fit + +# Returns +- `ft`: optimal FT value +- `min_fwhm`: corresponding FWHM value +""" +function fit_fwhm_ft_fep(e_grid::Matrix, e_grid_ft::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, rt::Unitful.RealOrRealQuantity, min_e::T, max_e::T, nbins::Int64, rel_cut_fit::T; default_ft::Quantity{T}=3.0u"µs") where {T <:Real} + @assert size(e_grid, 1) == length(e_grid_ft) "e_grid and e_grid_rt must have the same number of columns" + + # create empty array for results + fwhm = zeros(length(e_grid_ft)) + fwhm_err = zeros(length(e_grid_ft)) + + for (r, ft) in enumerate(e_grid_ft) + # if ft > rt filter doesn't make sense, continue + if ft > rt + @debug "FT $ft bigger than RT $rt, skipping" + fwhm[r] = NaN + fwhm_err[r] = NaN + continue + end + # get e values for this ft + e_ft = Array{Float64}(flatview(e_grid)[r, :]) + e_ft = e_ft[isfinite.(e_ft)] + + # sanity check + if count(min_e .< e_ft .< max_e) < 100 + @debug "Not enough data points for FT $ft, skipping" + fwhm[r] = NaN + fwhm_err[r] = NaN + continue + end + # cut around peak to increase performance + fit_cut = cut_single_peak(e_ft, min_e, max_e,; n_bins=nbins, relative_cut=rel_cut_fit) + e_ft = e_ft[fit_cut.max - 300 .< e_ft .< fit_cut.max + 300] + + # create histogram from it + bin_width = 2 * (quantile(e_ft, 0.75) - quantile(e_ft, 0.25)) / ∛(length(e_ft)) + h = fit(Histogram, e_ft, minimum(e_ft):bin_width:maximum(e_ft)) + + # create peakstats + ps = estimate_single_peak_stats_th228(h) + # check if ps guess is valid + if any(tuple_to_array(ps) .<= 0) + @debug "Invalid guess for peakstats, skipping" + fwhm[r] = NaN + fwhm_err[r] = NaN + continue + end + # fit peak + result, _ = fit_single_peak_th228(h, ps,; uncertainty=false) + # get fwhm + fwhm[r] = result.fwhm + # fwhm_err[r] = result.fwhm_err + end + + # get minimal fwhm and rt + if isempty(fwhm[fwhm .> 0]) + @warn "No valid FWHM found, setting to NaN" + min_fwhm = NaN + @warn "No valid FT found, setting to default" + ft_min_fwhm = default_ft + else + # calibration constant from last fit to get rough calibration for better plotting + c = 2614.5 ./ result.μ + fwhm = fwhm .* c + # get minimal fwhm and ft + min_fwhm = minimum(fwhm[fwhm .> 0]) + ft_min_fwhm = e_grid_ft[fwhm .> 0][findmin(fwhm[fwhm .> 0])[2]] + end + # generate result and report + result = ( + ft = ft_min_fwhm, + ft_err = step(e_grid_ft), + min_fwhm = min_fwhm + ) + report = ( + ft = result.ft, + min_fwhm = result.min_fwhm, + e_grid_ft = collect(e_grid_ft), + fwhm = fwhm, + # fwhm_err = fwhm_err + ) + return result, report + +end +export fit_fwhm_ft_fep + + +function fit_sg_wl(dep_sep_data::NamedTuple{(:dep, :sep)}, a_grid_wl_sg::StepRangeLen, optimization_config::PropDict) + # unpack config + dep, dep_window = optimization_config.sg.dep, Float64.(optimization_config.sg.dep_window) + sep, sep_window = optimization_config.sg.sep, Float64.(optimization_config.sg.sep_window) + + # unpack data + e_dep, e_sep = dep_sep_data.dep.e, dep_sep_data.sep.e + aoe_dep, aoe_sep = dep_sep_data.dep.aoe, dep_sep_data.sep.aoe + + + # prepare peakhist + result_dep, _ = prepare_dep_peakhist(e_dep, dep; n_bins_cut=optimization_config.sg.nbins_dep_cut, relative_cut=optimization_config.sg.dep_rel_cut) + + # get calib constant from fit on DEP peak + e_dep_calib = e_dep .* result_dep.m_calib + e_sep_calib = e_sep .* result_dep.m_calib + + # create empty arrays for sf and sf_err + sep_sfs = ones(length(a_grid_wl_sg)) .* 100 + sep_sfs_err = zeros(length(a_grid_wl_sg)) + + + # for each window lenght, calculate the survival fraction in the SEP + for (i_aoe, wl) in enumerate(a_grid_wl_sg) + + aoe_dep_i = aoe_dep[i_aoe, :][isfinite.(aoe_dep[i_aoe, :])] ./ result_dep.m_calib + e_dep_i = e_dep_calib[isfinite.(aoe_dep[i_aoe, :])] + + # prepare AoE + max_aoe_dep_i = quantile(aoe_dep_i, optimization_config.sg.max_aoe_quantile) + optimization_config.sg.max_aoe_offset + min_aoe_dep_i = quantile(aoe_dep_i, optimization_config.sg.min_aoe_quantile) + optimization_config.sg.min_aoe_offset + + try + psd_cut = get_psd_cut(aoe_dep_i, e_dep_i; window=dep_window, cut_search_interval=(min_aoe_dep_i, max_aoe_dep_i)) + + aoe_sep_i = aoe_sep[i_aoe, :][isfinite.(aoe_sep[i_aoe, :])] ./ result_dep.m_calib + e_sep_i = e_sep_calib[isfinite.(aoe_sep[i_aoe, :])] + + result_sep, _ = get_peak_surrival_fraction(aoe_sep_i, e_sep_i, sep, sep_window, psd_cut.cut; uncertainty=true, low_e_tail=false) + sep_sfs[i_aoe] = result_sep.sf * 100 + sep_sfs_err[i_aoe] = result_sep.err.sf * 100 + catch e + @warn "Couldn't process window length $wl" + end + end + # get minimal surrival fraction and window length + if isempty(sep_sfs[1.0 .< sep_sfs .< 100]) + @warn "No valid SEP SF found, setting to NaN" + min_sf = NaN + min_sf_err = NaN + @warn "No valid window length found, setting to default" + wl_sg_min_sf = 100u"ns" + else + min_sf = minimum(sep_sfs[1.0 .< sep_sfs .< 100]) + min_sf_err = sep_sfs_err[sep_sfs .== min_sf][1] + wl_sg_min_sf = a_grid_wl_sg[1.0 .< sep_sfs .< 100][findmin(sep_sfs[1.0 .< sep_sfs .< 100])[2]] + end + + # generate result and report + result = ( + wl = wl_sg_min_sf, + sf = min_sf, + sf_err = min_sf_err + ) + report = ( + wl = result.wl, + min_sf = result.sf, + min_sf_err = result.sf_err, + a_grid_wl_sg = collect(a_grid_wl_sg), + sfs = sep_sfs, + sfs_err = sep_sfs_err + ) + return result, report +end +export fit_sg_wl \ No newline at end of file diff --git a/src/gof.jl b/src/gof.jl new file mode 100644 index 00000000..63b293b2 --- /dev/null +++ b/src/gof.jl @@ -0,0 +1,176 @@ +# This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT). + + +""" + _prepare_data(h::Histogram{<:Real,1}) +aux. function to convert histogram data into bin edges, bin width and bin counts +""" +function _prepare_data(h::Histogram{<:Real,1}) + # get bin center, width and counts from histogrammed data + bin_edges = first(h.edges) + counts = h.weights + bin_centers = (bin_edges[begin:end-1] .+ bin_edges[begin+1:end]) ./ 2 + bin_widths = bin_edges[begin+1:end] .- bin_edges[begin:end-1] + return counts, bin_widths, bin_centers +end + + +""" + _get_model_counts(f_fit::Base.Callable,v_ml::NamedTuple,bin_centers::StepRangeLen,bin_widths::StepRangeLen) +aux. function to get modelled peakshape based on histogram binning and best-fit parameter +""" +function _get_model_counts(f_fit::Base.Callable,v_ml::NamedTuple,bin_centers::StepRangeLen,bin_widths::StepRangeLen) + model_func = Base.Fix2(f_fit, v_ml) # fix the fit parameters to ML best-estimate + model_counts = bin_widths.*map(energy->model_func(energy), bin_centers) # evaluate model at bin center (= binned measured energies) + return model_counts +end + + + +""" + p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) +calculate p-value based on least-squares +baseline method to get goodness-of-fit (gof) +# input: + * `f_fit`function handle of fit function (peakshape) + * `h` histogram of data + * `v_ml` best-fit parameters +# returns: + * `pval` p-value of chi2 test + * `chi2` chi2 value + * `dof` degrees of freedom +""" +function p_value(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + # prepare data + counts, bin_widths, bin_centers = _prepare_data(h) + + # get peakshape of best-fit + model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths) + + # calculate chi2 + chi2 = sum((model_counts[model_counts.>0]-counts[model_counts.>0]).^2 ./ model_counts[model_counts.>0]) + npar = length(v_ml) + dof = length(counts[model_counts.>0])-npar + pval = ccdf(Chisq(dof),chi2) + if any(model_counts.<=5) + @warn "WARNING: bin with <=$(round(minimum(model_counts),digits=0)) counts - chi2 test might be not valid" + else + @debug "p-value = $(round(pval,digits=2))" + end + return pval, chi2, dof +end +export p_value + + +""" + p_value_LogLikeRatio(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) +alternative p-value via loglikelihood ratio +""" +function p_value_LogLikeRatio(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + # prepare data + counts, bin_widths, bin_centers = _prepare_data(h) + + # get peakshape of best-fit + model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths) + + # calculate chi2 + chi2 = sum((model_counts[model_counts.>0]-counts[model_counts.>0]).^2 ./ model_counts[model_counts.>0]) + npar = length(v_ml) + dof = length(counts[model_counts.>0])-npar + pval = ccdf(Chisq(dof),chi2) + if any(model_counts.<=5) + @warn "WARNING: bin with <=$(minimum(model_counts)) counts - chi2 test might be not valid" + else + @debug "p-value = $(round(pval,digits=2))" + end + chi2 = 2*sum(model_counts.*log.(model_counts./counts)+model_counts-counts) + pval = ccdf(Chisq(dof),chi2) +return pval, chi2, dof +end +export p_value_LogLikeRatio + +""" + p_value_MC(f_fit::Base.Callable, h::Histogram{<:Real,1},ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background)},v_ml::NamedTuple,;n_samples::Int64=1000) +alternative p-value calculation via Monte Carlo sampling. **Warning**: computational more expensive than p_vaule() and p_value_LogLikeRatio() +# Input: + * `f_fit`function handle of fit function (peakshape) + * `h` histogram of data + * `ps` best-fit parameters + * `v_ml` best-fit parameters + * `n_samples` number of samples + +# Performed Steps: +* Create n_samples randomized histograms. For each bin, samples are drawn from a Poisson distribution with λ = model peak shape (best-fit parameter) +* Each sample histogram is fit using the model function `f_fit` +* For each sample fit, the max. loglikelihood fit is calculated + +# Returns +* % p value --> comparison of sample max. loglikelihood and max. loglikelihood of best-fit +""" +function p_value_MC(f_fit::Base.Callable, h::Histogram{<:Real,1},ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background)},v_ml::NamedTuple,;n_samples::Int64=1000) + counts, bin_widths, bin_centers = _prepare_data(h) # get data + # get peakshape of best-fit and maximum likelihood value + model_func = Base.Fix2(f_fit, v_ml) # fix the fit parameters to ML best-estimate + model_counts = bin_widths.*map(energy->model_func(energy), bin_centers) # evaluate model at bin center (= binned measured energies) + loglike_bf = -hist_loglike(model_func,h) + + # draw sample for each bin + dists = Poisson.(model_counts) # create poisson distribution for each bin + counts_mc_vec = rand.(dists,n_samples) # randomized histogram counts + counts_mc = [ [] for _ in 1:n_samples ] #re-structure data_samples_vec to array of arrays, there is probably a better way to do this... + for i = 1:n_samples + counts_mc[i] = map(x -> x[i],counts_mc_vec) + end + + # fit every sample histogram and calculate max. loglikelihood + loglike_bf_mc = NaN.*ones(n_samples) + h_mc = h # make copy of data histogram + for i=1:n_samples + h_mc.weights = counts_mc[i] # overwrite counts with MC values + result_fit_mc, report = fit_single_peak_th228(h_mc, ps ; uncertainty=false) # fit MC histogram + fit_par_mc = result_fit_mc[(:μ, :σ, :n, :step_amplitude, :skew_fraction, :skew_width, :background)] + model_func_sample = Base.Fix2(f_fit, fit_par_mc) # fix the fit parameters to ML best-estimate + loglike_bf_mc[i] = -hist_loglike(model_func_sample,h_mc) # loglikelihood for best-fit + end + + # calculate p-value + pval= sum(loglike_bf_mc.<=loglike_bf)./n_samples # preliminary. could be improved e.g. with interpolation + return pval +end +export p_value_MC + +""" + residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) +Calculate bin-wise residuals and normalized residuals. +Calcualte bin-wise p-value based on poisson distribution for each bin. + +# Input: + * `f_fit`function handle of fit function (peakshape) + * `h` histogram of data + * `v_ml` best-fit parameters + +# Returns: + * `residuals` difference: model - data (histogram bin count) + * `residuals_norm` normalized residuals: model - data / sqrt(model) + * `p_value_binwise` p-value for each bin based on poisson distribution +""" +function get_residuals(f_fit::Base.Callable, h::Histogram{<:Real,1},v_ml::NamedTuple) + # prepare data + counts, bin_widths, bin_centers = _prepare_data(h) + + # get peakshape of best-fit + model_counts = _get_model_counts(f_fit, v_ml, bin_centers,bin_widths) + + # calculate bin-wise residuals + residuals = model_counts[model_counts.>0]-counts[model_counts.>0] + sigma = sqrt.(model_counts[model_counts.>0]) + residuals_norm = residuals./sigma + + # calculate something like a bin-wise p-value (in case that makes sense) + dist = Poisson.(model_counts) # each bin: poisson distributed + cdf_value_low = cdf.(dist, model_counts.-abs.(residuals)) + cdf_value_up = 1 .-cdf.(dist, model_counts.+abs.(residuals)) + p_value_binwise = cdf_value_low .+ cdf_value_up # significance of residuals -> ~proabability that residual (for a given bin) is as large as observed or larger + return residuals, residuals_norm, p_value_binwise, bin_centers +end + diff --git a/src/optimization.jl b/src/optimization.jl deleted file mode 100644 index 6571a392..00000000 --- a/src/optimization.jl +++ /dev/null @@ -1,132 +0,0 @@ -""" - fit_enc_sigmas(enc_grid::Matrix{T}, enc_grid_rt::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, min_enc::T, max_enc::T, nbins::Int64, rel_cut_fit::T) where T<:Real - -Fit the ENC values in `enc_grid` for each RT in `enc_grid_rt` with a Gaussian and return the optimal RT and the corresponding ENC value. - -# Arguments -- `enc_grid`: 2D array of ENC values for each RT in `enc_grid_rt` -- `enc_grid_rt`: 1D array of RT values for which the ENC values in `enc_grid` are calculated -- `min_enc`: minimum ENC value to consider for the fit -- `max_enc`: maximum ENC value to consider for the fit -- `nbins`: number of bins to use for the histogram of ENC values -- `rel_cut_fit`: relative cut value to use for the fit - -# Returns -- `rt`: optimal RT value -- `min_enc`: corresponding ENC value -""" -function fit_enc_sigmas(enc_grid::Matrix{T}, enc_grid_rt::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}, min_enc::T, max_enc::T, nbins::Int64, rel_cut_fit::T) where T<:Real - @assert size(enc_grid, 1) == length(enc_grid_rt) "enc_grid and enc_grid_rt must have the same number of columns" - - # create empty array for results - enc = zeros(length(enc_grid_rt)) - enc_err = zeros(length(enc_grid_rt)) - - for (r, rt) in enumerate(enc_grid_rt) - # get enc for this rt - enc_rt = flatview(enc_grid)[r, :] - # get cut value - cuts = cut_single_peak(enc_rt, min_enc, max_enc, nbins, rel_cut_fit) - - # fit gaussian - result, report = fit_single_trunc_gauss(enc_rt, cuts) - - # get sigma - enc[r] = result.σ - enc_err[r] = result.σ_err - end - - # get minimal enc and rt - min_enc = minimum(enc[enc .> 0]) - rt_min_enc = enc_grid_rt[enc .> 0][findmin(enc[enc .> 0])[2]] - - # generate result and report - result = ( - rt = rt_min_enc, - min_enc = min_enc - ) - report = ( - rt = result.rt, - min_enc = result.min_enc, - enc_grid_rt = collect(enc_grid_rt), - enc = enc, - enc_err = enc_err - ) - return result, report - -end -export fit_enc_sigmas - -""" - fit_fwhm_ft_fep(e_grid::Matrix{T}, e_grid_ft::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}) where T <:Real - -Fit the FWHM values in `e_grid` for each FT in `e_grid_ft` with a Gamma Peakshape and return the optimal FT and the corresponding FWHM value. - -# Arguments -- `e_grid`: 2D array of energy values for each FT in `e_grid_ft` -- `e_grid_ft`: 1D array of FT values for which the FWHM values in `e_grid` are calculated - -# Returns -- `ft`: optimal FT value -- `min_fwhm`: corresponding FWHM value -""" -function fit_fwhm_ft_fep(e_grid::Matrix, e_grid_ft::StepRangeLen{Quantity{<:T}, Base.TwicePrecision{Quantity{<:T}}, Base.TwicePrecision{Quantity{<:T}}, Int64}) where {T <:Real} - @assert size(e_grid, 1) == length(e_grid_ft) "e_grid and e_grid_rt must have the same number of columns" - - # create empty array for results - fwhm = zeros(length(e_grid_ft)) - fwhm_err = zeros(length(e_grid_ft)) - - for (r, rt) in enumerate(e_grid_ft) - # get e values for this rt - e_ft = Array{Float64}(flatview(e_grid)[r, :]) - e_ft = e_ft[isfinite.(e_ft)] - # create histogram from it - bin_width = 2 * (quantile(e_ft, 0.75) - quantile(e_ft, 0.25)) / ∛(length(e_ft)) - h = fit(Histogram, e_ft, median(e_ft) - 100:bin_width:median(e_ft) + 100) - # create peakstats - ps = estimate_single_peak_stats_th228(h) - # check if ps guess is valid - if any(tuple_to_array(ps) .<= 0) - @debug "Invalid guess for peakstats, skipping" - fwhm[r] = NaN - fwhm_err[r] = NaN - continue - end - # fit peak - result, report = fit_single_peak_th228(h, ps, false) - # get fwhm - fwhm[r] = result.fwhm - # fwhm_err[r] = result.fwhm_err - end - - # calibration constant from last fit to get rough calibration for better plotting - c = 2614.5 ./ result.μ - fwhm = fwhm .* c - - # get minimal fwhm and rt - if isempty(fwhm[fwhm .> 0]) - @warn "No valid FWHM found, setting to NaN" - min_fwhm = NaN - @warn "No valid FT found, setting to maximum" - ft_min_fwhm = e_grid_ft[end] - else - min_fwhm = minimum(fwhm[fwhm .> 0]) - ft_min_fwhm = e_grid_ft[fwhm .> 0][findmin(fwhm[fwhm .> 0])[2]] - end - # generate result and report - result = ( - ft = ft_min_fwhm, - min_fwhm = min_fwhm - ) - report = ( - ft = result.ft, - min_fwhm = result.min_fwhm, - e_grid_ft = collect(e_grid_ft), - fwhm = fwhm, - # fwhm_err = fwhm_err - ) - return result, report - -end -export fit_fwhm_ft_fep diff --git a/src/qc.jl b/src/qc.jl new file mode 100644 index 00000000..61a11f72 --- /dev/null +++ b/src/qc.jl @@ -0,0 +1,69 @@ + + +""" + qc_sg_optimization(dsp_dep, dsp_sep, optimization_config) + +Perform simple QC cuts on the DEP and SEP data and return the data for the optimization of the SG window length. +""" +function qc_sg_optimization(dsp_dep::NamedTuple{(:aoe, :e, :blmean, :blslope, :t50)}, dsp_sep::NamedTuple{(:aoe, :e, :blmean, :blslope, :t50)}, optimization_config::PropDict) + ### DEP + # Load DEP data and prepare Pile-up cut + blslope_dep, t50_dep = dsp_dep.blslope[isfinite.(dsp_dep.e)], dsp_dep.t50[isfinite.(dsp_dep.e)] + aoe_dep, e_dep = dsp_dep.aoe[:, isfinite.(dsp_dep.e)], dsp_dep.e[isfinite.(dsp_dep.e)] + # get half truncated centered cut on blslope for pile-up rejection + result_dep_slope_cut, report_dep_slope_cut = get_centered_gaussian_window_cut(blslope_dep, -0.1u"ns^-1", 0.1u"ns^-1", optimization_config.sg.cuts.dep.blslope_sigma, ; n_bins_cut=optimization_config.sg.cuts.dep.nbins_blslope_cut, relative_cut=optimization_config.sg.cuts.dep.rel_cut_blslope_cut) + # Cut on blslope, energy and t0 for simple QC + qc_cut_dep = blslope_dep .> result_dep_slope_cut.low_cut .&& blslope_dep .< result_dep_slope_cut.high_cut .&& e_dep .> optimization_config.sg.cuts.dep.min_e .&& quantile(e_dep, first(optimization_config.sg.cuts.dep.e_quantile)) .< e_dep .< quantile(e_dep, last(optimization_config.sg.cuts.dep.e_quantile)) .&& first(optimization_config.sg.cuts.dep.t50)u"µs" .< t50_dep .< last(optimization_config.sg.cuts.dep.t50)u"µs" + aoe_dep, e_dep = aoe_dep[:, qc_cut_dep], e_dep[qc_cut_dep] + + ### SEP + # Load SEP data and prepare Pile-up cut + blslope_sep, t50_sep = dsp_sep.blslope[isfinite.(dsp_sep.e)], dsp_sep.t50[isfinite.(dsp_sep.e)] + aoe_sep, e_sep = dsp_sep.aoe[:, isfinite.(dsp_sep.e)], dsp_sep.e[isfinite.(dsp_sep.e)] + + # get half truncated centered cut on blslope for pile-up rejection + result_sep_slope_cut, report_sep_slope_cut = get_centered_gaussian_window_cut(blslope_sep, -0.1u"ns^-1", 0.1u"ns^-1", optimization_config.sg.cuts.sep.blslope_sigma, ; n_bins_cut=optimization_config.sg.cuts.sep.nbins_blslope_cut, relative_cut=optimization_config.sg.cuts.sep.rel_cut_blslope_cut) + + # Cut on blslope, energy and t0 for simple QC + qc_cut_sep = blslope_sep .> result_sep_slope_cut.low_cut .&& blslope_sep .< result_sep_slope_cut.high_cut .&& e_sep .> optimization_config.sg.cuts.sep.min_e .&& quantile(e_sep, first(optimization_config.sg.cuts.sep.e_quantile)) .< e_sep .< quantile(e_sep, last(optimization_config.sg.cuts.sep.e_quantile)) .&& first(optimization_config.sg.cuts.sep.t50)u"µs" .< t50_sep .< last(optimization_config.sg.cuts.sep.t50)u"µs" + aoe_sep, e_sep = aoe_sep[:, qc_cut_sep], e_sep[qc_cut_sep] + + return (dep=(aoe=aoe_dep, e=e_dep), sep=(aoe=aoe_sep, e=e_sep)) +end +export qc_sg_optimization + + +""" + qc_cal_energy(data, qc_config) + +Perform simple QC cuts on the data and return the data for energy calibration. +""" +function qc_cal_energy(data::Q, qc_config::PropDict) where Q<:Table + # get bl mean cut + result_blmean, _ = get_centered_gaussian_window_cut(data.blmean, qc_config.blmean.min, qc_config.blmean.max, qc_config.blmean.sigma, ; n_bins_cut=convert(Int64, round(length(data) * qc_config.blmean.n_bins_fraction)), relative_cut=qc_config.blmean.relative_cut, fixed_center=false, left=true) + blmean_qc = result_blmean.low_cut .< data.blmean .< result_blmean.high_cut + @debug format("Baseline Mean cut surrival fraction {:.2f}%", count(blmean_qc) / length(data) * 100) + # get bl slope cut + result_blslope, _ = get_centered_gaussian_window_cut(data.blslope, qc_config.blslope.min*u"ns^-1", qc_config.blslope.max*u"ns^-1", qc_config.blslope.sigma, ; n_bins_cut=convert(Int64, round(length(data) * qc_config.blslope.n_bins_fraction)), relative_cut=qc_config.blslope.relative_cut, fixed_center=true, left=false, center=zero(data.blslope[1])) + blslope_qc = result_blslope.low_cut .< data.blslope .< result_blslope.high_cut + @debug format("Baseline Slope cut surrival fraction {:.2f}%", count(blslope_qc) / length(data) * 100) + # get blsigma cut + result_blsigma, _ = get_centered_gaussian_window_cut(data.blsigma, qc_config.blsigma.min, qc_config.blsigma.max, qc_config.blsigma.sigma, ; n_bins_cut=convert(Int64, round(length(data) * qc_config.blsigma.n_bins_fraction)), relative_cut=qc_config.blsigma.relative_cut, fixed_center=false, left=true) + blsigma_qc = result_blsigma.low_cut .< data.blsigma .< result_blsigma.high_cut + @debug format("Baseline Sigma cut surrival fraction {:.2f}%", count(blsigma_qc) / length(data) * 100) + # get t0 cut + t0_qc = qc_config.t0.min*u"µs" .< data.t0 .< qc_config.t0.max*u"µs" + @debug format("t0 cut surrival fraction {:.2f}%", count(t0_qc) / length(data) * 100) + # get intrace pile-up cut + inTrace_qc = .!(data.inTrace_intersect .> data.t0 .+ 2 .* data.drift_time .&& data.inTrace_n .> 1) + @debug format("Intrace pile-up cut surrival fraction {:.2f}%", count(inTrace_qc) / length(data) * 100) + # get energy cut + energy_qc = qc_config.e_trap.min .< data.e_trap .&& isfinite.(data.e_trap) .&& isfinite.(data.e_zac) .&& isfinite.(data.e_cusp) + @debug format("Energy cut surrival fraction {:.2f}%", count(energy_qc) / length(data) * 100) + + # combine all cuts + qc_tab = TypedTables.Table(blmean = blmean_qc, blslope = blslope_qc, blsigma = blsigma_qc, t0 = t0_qc, inTrace = inTrace_qc, energy = energy_qc, qc = blmean_qc .&& blslope_qc .&& blsigma_qc .&& t0_qc .&& inTrace_qc .&& energy_qc) + @debug format("Total QC cut surrival fraction {:.2f}%", count(qc) / length(data) * 100) + return qc_tab +end +export qc_cal_energy \ No newline at end of file diff --git a/src/simple_calibration.jl b/src/simple_calibration.jl index d9d07f21..5f7b1cb9 100644 --- a/src/simple_calibration.jl +++ b/src/simple_calibration.jl @@ -15,7 +15,7 @@ Returns * `peakhists`: array of histograms around the calibration lines * `peakstats`: array of statistics for the calibration line fits """ -function simple_calibration(e_uncal::AbstractArray{<:Real}, th228_lines::Array{T}, window_sizes::Array{T},; n_bins::Int=15000, quantile_perc::Float64=NaN, calib_type::Symbol=:th228) where T<:Real +function simple_calibration(e_uncal::AbstractArray{<:Real}, th228_lines::Array{T}, window_sizes::Array{Tuple{T, T}},; n_bins::Int=15000, quantile_perc::Float64=NaN, calib_type::Symbol=:th228) where T<:Real if calib_type == :th228 return simple_calibration_th228(e_uncal, th228_lines, window_sizes,; n_bins=n_bins, quantile_perc=quantile_perc) else @@ -24,13 +24,12 @@ function simple_calibration(e_uncal::AbstractArray{<:Real}, th228_lines::Array{T end export simple_calibration -function simple_calibration_th228(e_uncal::AbstractArray{<:Real}, th228_lines::Array{T}, window_sizes::Array{T},; n_bins::Int=15000, quantile_perc::Float64=NaN) where T<:Real +function simple_calibration_th228(e_uncal::AbstractArray{<:Real}, th228_lines::Array{T}, window_sizes::Array{Tuple{T, T}},; n_bins::Int=15000, quantile_perc::Float64=NaN) where T<:Real # create initial peak search histogram h_uncal = fit(Histogram, e_uncal, nbins=n_bins) # search all possible peak candidates _, peakpos = RadiationSpectra.peakfinder(h_uncal, σ=5.0, backgroundRemove=true, threshold=10) # the FEP ist the last peak in the list - println() if isnan(quantile_perc) fep_guess = sort(peakpos)[end] else @@ -41,11 +40,11 @@ function simple_calibration_th228(e_uncal::AbstractArray{<:Real}, th228_lines::A e_simple = e_uncal .* c bin_window_cut = 2103.5 - 10 .< e_simple .< 2103.5 + 10 # get optimal binning for simple calibration - bin_width = 2 * (quantile(e_simple[bin_window_cut], 0.75) - quantile(e_simple[bin_window_cut], 0.25)) / ∛(length(e_simple[bin_window_cut])) + bin_width = get_friedman_diaconis_bin_width(e_simple[bin_window_cut]) # create histogram for simple calibration h_calsimple = fit(Histogram, e_simple, 0:bin_width:3000) # get histograms around calibration lines and peakstats - peakhists = LegendSpecFits.subhist.(Ref(h_calsimple), [(peak-window, peak+window) for (peak, window) in zip(th228_lines, window_sizes)]) + peakhists = LegendSpecFits.subhist.(Ref(h_calsimple), [(peak-first(window), peak+last(window)) for (peak, window) in zip(th228_lines, window_sizes)]) # peakhists = LegendSpecFits.subhist.([e_simple[peak-window .< e_simple .< peak+window] for (peak, window) in zip(th228_lines, window_sizes)]) peakstats = StructArray(estimate_single_peak_stats.(peakhists)) result = ( diff --git a/src/singlefit.jl b/src/singlefit.jl index 1398628f..9a6bda7b 100644 --- a/src/singlefit.jl +++ b/src/singlefit.jl @@ -145,4 +145,80 @@ function fit_half_centered_trunc_gauss(x::Vector{T}, μ::T, cuts::NamedTuple{(:l ) return (result = result, report = report) end -export fit_half_centered_trunc_gauss \ No newline at end of file +export fit_half_centered_trunc_gauss + + + +""" + fit_half_centered_trunc_gauss(x::Array, cuts::NamedTuple{(:low, :high, :max), Tuple{Float64, Float64, Float64}}) +Fit a single truncated Gaussian to the data `x` between `cut.low` and `cut.high`. The peak center is fixed at `μ` and the peak is cut in half either in the left or right half. +# Returns `report` and `result`` with: + * `f_fit`: fitted function + * `μ`: mean of the Gaussian + * `μ_err`: error of the mean + * `σ`: standard deviation of the Gaussian + * `σ_err`: error of the standard deviation + * `n`: number of counts in the peak +""" +function fit_half_trunc_gauss(x::Vector{T}, cuts::NamedTuple{(:low, :high, :max), Tuple{T, T, T}}; left::Bool=false) where T<:Unitful.RealOrRealQuantity + @assert unit(cuts.low) == unit(cuts.high) == unit(cuts.max) == unit(x[1]) "Units of min_x, max_x and x must be the same" + x_unit = unit(x[1]) + x, cut_low, cut_high, cut_max = ustrip.(x), ustrip(cuts.low), ustrip(cuts.high), ustrip(cuts.max) + + # cut peak out of data + x = x[(x .> cut_low) .&& (x .< cut_high)] + # create peak stats for start values + ps = (peak_pos = cut_max, peak_sigma = std(x), peak_counts = length(x)) + @debug "Peak stats: $ps" + # create pseudo priors + pseudo_prior = NamedTupleDist( + μ = Uniform(ps.peak_pos-2*ps.peak_sigma, ps.peak_pos+2*ps.peak_sigma), + σ = weibull_from_mx(ps.peak_sigma, 3*ps.peak_sigma), + n = Uniform(ps.peak_counts-100, ps.peak_counts+100) + ) + # create fit model + f_trafo = BAT.DistributionTransform(Normal, pseudo_prior) + # f_trafo = LegendSpecFitsBATExt.get_distribution_transform(Normal, pseudo_prior) + + v_init = mean(pseudo_prior) + + f_loglike = let cut_low = cut_low, cut_high = cut_high, cut_max = cut_max, left = left, x = x[ifelse(left, x .< cut_max, x .> cut_max)] + v -> (-1) * loglikelihood(truncated(Normal(v[1], v[2]), ifelse(left, cut_low, cut_max), ifelse(left, cut_max, cut_high)), x) + end + + # fit data + opt_r = optimize(f_loglike ∘ inverse(f_trafo), f_trafo(v_init)) + μ, σ = inverse(f_trafo)(opt_r.minimizer) + + # Calculate the Hessian matrix using ForwardDiff + H = ForwardDiff.hessian(f_loglike, [μ, σ]) + + # Calculate the parameter covariance matrix + param_covariance = inv(H) + + # Extract the parameter uncertainties + μ_uncertainty = sqrt(abs(param_covariance[1, 1])) + σ_uncertainty = sqrt(abs(param_covariance[2, 2])) + + @debug "μ: $μ ± $μ_uncertainty" + @debug "σ: $σ ± $σ_uncertainty" + + result = ( + μ = μ * x_unit, + μ_err = μ_uncertainty * x_unit, + σ = σ * x_unit, + σ_err = σ_uncertainty * x_unit, + n = length(x) + ) + report = ( + # f_fit = t -> pdf(truncated(Normal(μ, σ), ifelse(left, cut_low, μ), ifelse(left, μ, cut_high)), t), + f_fit = t -> pdf(Normal(μ, σ), t), + μ = result.μ, + μ_err = result.μ_err, + σ = result.σ, + σ_err = result.σ_err, + n = result.n + ) + return (result = result, report = report) +end +export fit_half_trunc_gauss \ No newline at end of file diff --git a/src/specfit.jl b/src/specfit.jl index 01713bcd..cf28a40a 100644 --- a/src/specfit.jl +++ b/src/specfit.jl @@ -8,7 +8,6 @@ th228_fit_functions = ( f_bck = (x, v) -> background_peakshape(x, v.μ, v.σ, v.step_amplitude, v.background), f_sigWithTail = (x, v) -> signal_peakshape(x, v.μ, v.σ, v.n, v.skew_fraction) + lowEtail_peakshape(x, v.μ, v.σ, v.n, v.skew_fraction, v.skew_width) ) - """ estimate_single_peak_stats(h::Histogram, calib_type::Symbol=:th228) @@ -38,23 +37,39 @@ function estimate_single_peak_stats(h::Histogram,; calib_type::Symbol=:th228) end export estimate_single_peak_stats - function estimate_single_peak_stats_th228(h::Histogram{T}) where T<:Real W = h.weights E = first(h.edges) peak_amplitude, peak_idx = findmax(W) - fwhm_idx_left = findfirst(w -> w >= (first(W) + peak_amplitude) /2, W) - fwhm_idx_right = findlast(w -> w >= (last(W) + peak_amplitude) /2, W) + fwhm_idx_left = findfirst(w -> w >= (first(W) + peak_amplitude) / 2, W) + fwhm_idx_right = findlast(w -> w >= (last(W) + peak_amplitude) / 2, W) peak_max_pos = (E[peak_idx] + E[peak_idx+1]) / 2 peak_mid_pos = (E[fwhm_idx_right] + E[fwhm_idx_left]) / 2 peak_pos = (peak_max_pos + peak_mid_pos) / 2.0 peak_fwhm = (E[fwhm_idx_right] - E[fwhm_idx_left]) / 1.0 peak_sigma = peak_fwhm * inv(2*√(2log(2))) + peak_fwqm = NaN + # make sure that peakstats have non-zero sigma and fwhm values to prevent fit priors from being zero + if peak_fwhm == 0 + fwqm_idx_left = findfirst(w -> w >= (first(W) + peak_amplitude) / 4, W) + fwqm_idx_right = findlast(w -> w >= (last(W) + peak_amplitude) / 4, W) + peak_fwqm = (E[fwqm_idx_right] - E[fwqm_idx_left]) / 1.0 + peak_sigma = peak_fwqm * inv(2*√(2log(4))) + peak_fwhm = peak_sigma * 2*√(2log(2)) + end + if peak_sigma == 0 + peak_sigma = 1.0 + peak_fwhm = 2.0 + end #peak_area = peak_amplitude * peak_sigma * sqrt(2*π) mean_background = (first(W) + last(W)) / 2 mean_background = ifelse(mean_background == 0, 0.01, mean_background) peak_counts = inv(0.761) * (sum(view(W,fwhm_idx_left:fwhm_idx_right)) - mean_background * peak_fwhm) - + peak_counts = ifelse(peak_counts < 0.0, inv(0.761) * sum(view(W,fwhm_idx_left:fwhm_idx_right)), peak_counts) + if !isnan(peak_fwqm) + peak_counts = inv(0.904) * (sum(view(W,fwqm_idx_left:fwqm_idx_right)) - mean_background * peak_fwqm) + peak_counts = ifelse(peak_counts < 0.0, inv(0.904) * sum(view(W,fwqm_idx_left:fwqm_idx_right)), peak_counts) + end ( peak_pos = peak_pos, peak_fwhm = peak_fwhm, @@ -65,22 +80,22 @@ function estimate_single_peak_stats_th228(h::Histogram{T}) where T<:Real end """ - fitPeaks -Perform a fit of the peakshape to the data in `peakhists` using the initial values in `peakstats` to the calibration lines in `th228_lines`. + fit_peaks(peakhists::Array, peakstats::StructArray, th228_lines::Array,; calib_type::Symbol=:th228, uncertainty::Bool=true, low_e_tail::Bool=true) +Perform a fit of the peakshape to the data in `peakhists` using the initial values in `peakstats` to the calibration lines in `th228_lines`. # Returns * `peak_fit_plots`: array of plots of the peak fits * `return_vals`: dictionary of the fit results """ -function fit_peaks(peakhists::Array, peakstats::StructArray, th228_lines::Array,; calib_type::Symbol=:th228) +function fit_peaks(peakhists::Array, peakstats::StructArray, th228_lines::Array,; calib_type::Symbol=:th228, uncertainty::Bool=true, low_e_tail::Bool=true,iterative_fit::Bool=false) if calib_type == :th228 - return fit_peaks_th228(peakhists, peakstats, th228_lines) + return fit_peaks_th228(peakhists, peakstats, th228_lines,; uncertainty=uncertainty, low_e_tail=low_e_tail,iterative_fit=iterative_fit) else error("Calibration type not supported") end end export fit_peaks -function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines::Array{T}) where T<:Real +function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines::Array{T},; uncertainty::Bool=true, low_e_tail::Bool=true, iterative_fit::Bool=false) where T<:Any # create return and result dicts result = Dict{T, NamedTuple}() report = Dict{T, NamedTuple}() @@ -90,7 +105,18 @@ function fit_peaks_th228(peakhists::Array, peakstats::StructArray, th228_lines:: h = peakhists[i] ps = peakstats[i] # fit peak - result_peak, report_peak = fit_single_peak_th228(h, ps, ; uncertainty=true) + result_peak, report_peak = fit_single_peak_th228(h, ps, ; uncertainty=uncertainty, low_e_tail=low_e_tail) + + # check covariance matrix for being semi positive definite (no negative uncertainties) + if uncertainty + if iterative_fit && !isposdef(result_peak.covmat) + @warn "Covariance matrix not positive definite for peak $peak - repeat fit without low energy tail" + pval_save = result_peak.pval + result_peak, report_peak = fit_single_peak_th228(h, ps, ; uncertainty=uncertainty, low_e_tail=false) + @info "New covariance matrix is positive definite: $(isposdef(result_peak.covmat))" + @info "p-val with low-energy tail p=$(round(pval_save,digits=5)) , without low-energy tail: p=$(round((result_peak.pval),digits=5))" + end + end # save results result[peak] = result_peak report[peak] = report_peak @@ -100,45 +126,45 @@ end """ - fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}};, uncertainty::Bool=true, fixed_position::Bool=false) where T<:Real + fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}};, uncertainty::Bool=true, fixed_position::Bool=false, low_e_tail::Bool=true) where T<:Real Perform a fit of the peakshape to the data in `h` using the initial values in `ps` while using the `gamma_peakshape` with low-E tail. -Also, FWHM is calculated from the fitted peakshape with MC error propagation. -The peak position can be fixed to the value in `ps` by setting `fixed_position=true`. +Also, FWHM is calculated from the fitted peakshape with MC error propagation. The peak position can be fixed to the value in `ps` by setting `fixed_position=true`. If the low-E tail should not be fitted, it can be disabled by setting `low_e_tail=false`. # Returns * `result`: NamedTuple of the fit results containing values and errors * `report`: NamedTuple of the fit report which can be plotted """ -function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}}; uncertainty::Bool=true, fixed_position::Bool=false) where T<:Real - # create pseudo priors - pseudo_prior = NamedTupleDist( - μ = Uniform(ps.peak_pos-10, ps.peak_pos+10), +function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fwhm, :peak_sigma, :peak_counts, :mean_background), NTuple{5, T}}; + uncertainty::Bool=true, low_e_tail::Bool=true, fixed_position::Bool=false, pseudo_prior::NamedTupleDist=NamedTupleDist(empty = true), + fit_fun::Symbol=:f_fit) where T<:Real + # create standard pseudo priors + standard_pseudo_prior = NamedTupleDist( + μ = ifelse(fixed_position, ConstValueDist(ps.peak_pos), Uniform(ps.peak_pos-10, ps.peak_pos+10)), σ = weibull_from_mx(ps.peak_sigma, 2*ps.peak_sigma), n = weibull_from_mx(ps.peak_counts, 2*ps.peak_counts), step_amplitude = weibull_from_mx(ps.mean_background, 2*ps.mean_background), - skew_fraction = Uniform(0.005, 0.25), - skew_width = LogUniform(0.001, 0.1), + skew_fraction = ifelse(low_e_tail, Uniform(0.005, 0.25), ConstValueDist(0.0)), + skew_width = ifelse(low_e_tail, LogUniform(0.001, 0.1), ConstValueDist(1.0)), background = weibull_from_mx(ps.mean_background, 2*ps.mean_background), ) - if fixed_position - pseudo_prior = NamedTupleDist( - μ = ConstValueDist(ps.peak_pos), - σ = weibull_from_mx(ps.peak_sigma, 2*ps.peak_sigma), - n = weibull_from_mx(ps.peak_counts, 2*ps.peak_counts), - step_amplitude = weibull_from_mx(ps.mean_background, 2*ps.mean_background), - skew_fraction = Uniform(0.01, 0.25), - skew_width = LogUniform(0.001, 0.1), - background = weibull_from_mx(ps.mean_background, 2*ps.mean_background), - ) + # use standard priors in case of no overwrites given + if !(:empty in keys(pseudo_prior)) + # check if input overwrite prior has the same fields as the standard prior set + @assert all(f -> f in keys(standard_pseudo_prior), keys(standard_pseudo_prior)) "Pseudo priors can only have $(keys(standard_pseudo_prior)) as fields." + # replace standard priors with overwrites + pseudo_prior = merge(standard_pseudo_prior, pseudo_prior) + else + # take standard priors as pseudo priors with overwrites + pseudo_prior = standard_pseudo_prior end - + # transform back to frequency space f_trafo = BAT.DistributionTransform(Normal, pseudo_prior) # start values for MLE v_init = mean(pseudo_prior) - # create loglikehood function - f_loglike = let f_fit=th228_fit_functions.f_fit, h=h + # create loglikehood function: f_loglike(v) that can be evaluated for any set of v (fit parameter) + f_loglike = let f_fit=th228_fit_functions[fit_fun], h=h v -> hist_loglike(Base.Fix2(f_fit, v), h) end @@ -148,8 +174,8 @@ function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fw # best fit results v_ml = inverse(f_trafo)(Optim.minimizer(opt_r)) - f_loglike_array = let f_fit=gamma_peakshape, h=h - v -> - hist_loglike(x -> f_fit(x, v...), h) + f_loglike_array = let f_fit=th228_fit_functions[fit_fun], h=h, v_keys = keys(standard_pseudo_prior) #same loglikelihood function as f_loglike, but has array as input instead of NamedTuple + v -> - hist_loglike( x -> f_fit(x,NamedTuple{v_keys}(v)), h) end if uncertainty @@ -157,28 +183,36 @@ function fit_single_peak_th228(h::Histogram, ps::NamedTuple{(:peak_pos, :peak_fw H = ForwardDiff.hessian(f_loglike_array, tuple_to_array(v_ml)) # Calculate the parameter covariance matrix - param_covariance = inv(H) - + param_covariance_raw = inv(H) + param_covariance = nearestSPD(param_covariance_raw) + # Extract the parameter uncertainties v_ml_err = array_to_tuple(sqrt.(abs.(diag(param_covariance))), v_ml) + # calculate p-value + pval, chi2, dof = p_value(th228_fit_functions.f_fit, h, v_ml) + + # calculate normalized residuals + residuals, residuals_norm, p_value_binwise, bin_centers = get_residuals(th228_fit_functions.f_fit, h, v_ml) + # get fwhm of peak - fwhm, fwhm_err = get_peak_fwhm_th228(v_ml, v_ml_err) + fwhm, fwhm_err = get_peak_fwhm_th228(v_ml, param_covariance) @debug "Best Fit values" @debug "μ: $(v_ml.μ) ± $(v_ml_err.μ)" @debug "σ: $(v_ml.σ) ± $(v_ml_err.σ)" @debug "n: $(v_ml.n) ± $(v_ml_err.n)" + @debug "p: $pval , chi2 = $(chi2) with $(dof) dof" @debug "FWHM: $(fwhm) ± $(fwhm_err)" - - result = merge(v_ml, (fwhm = fwhm, err = merge(v_ml_err, (fwhm = fwhm_err,)))) + + result = merge(v_ml, (pval = pval, chi2 = chi2, dof = dof, fwhm = fwhm,covmat = param_covariance, covmat_raw = param_covariance_raw,residuals = residuals, residuals_norm = residuals_norm, p_value_binwise= p_value_binwise,bin_centers = bin_centers,),(err = merge(v_ml_err, (fwhm = fwhm_err,)),)) report = ( v = v_ml, h = h, f_fit = x -> Base.Fix2(th228_fit_functions.f_fit, v_ml)(x), f_sig = x -> Base.Fix2(th228_fit_functions.f_sig, v_ml)(x), f_lowEtail = x -> Base.Fix2(th228_fit_functions.f_lowEtail, v_ml)(x), - f_bck = x -> Base.Fix2(th228_fit_functions.f_bck, v_ml)(x) + f_bck = x -> Base.Fix2(th228_fit_functions.f_bck, v_ml)(x), ) else # get fwhm of peak @@ -225,8 +259,6 @@ function estimate_fwhm(v::NamedTuple) return NaN end end - - """ get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::NamedTuple) Get the FWHM of a peak from the fit parameters while performing a MC error propagation. @@ -235,22 +267,27 @@ Get the FWHM of a peak from the fit parameters while performing a MC error propa * `fwhm`: the FWHM of the peak * `fwhm_err`: the uncertainty of the FWHM of the peak """ -function get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::NamedTuple, uncertainty::Bool=true) +function get_peak_fwhm_th228(v_ml::NamedTuple, v_ml_err::Union{Matrix,NamedTuple},uncertainty::Bool=true) # get fwhm for peak fit fwhm = estimate_fwhm(v_ml) if !uncertainty return fwhm, NaN end + # get MC for FWHM err - v_mc = get_mc_value_shapes(v_ml, v_ml_err, 1000) + if isa(v_ml_err,Matrix)# use correlated fit parameter uncertainties + v_mc = get_mc_value_shapes(v_ml, v_ml_err, 10000) + elseif isa(v_ml_err,NamedTuple) # use uncorrelated fit parameter uncertainties + v_mc = get_mc_value_shapes(v_ml, v_ml_err, 1000) + end fwhm_mc = estimate_fwhm.(v_mc) fwhm_err = std(fwhm_mc[isfinite.(fwhm_mc)]) return fwhm, fwhm_err end - +export get_peak_fwhm_th228 """ - fitCalibration + fitCalibration(peaks::Array, μ::Array) Fit the calibration lines to a linear function. # Returns * `slope`: the slope of the linear fit diff --git a/src/utils.jl b/src/utils.jl index 6bfa8990..22590dfc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -66,11 +66,82 @@ end """ get_mc_value_shapes(v::NamedTuple, v_err::NamedTuple, n::Int64) - Return a `NamedTuple` with the same fields as `v` and `v_err` but with `Normal` distributions for each field. """ function get_mc_value_shapes(v::NamedTuple, v_err::NamedTuple, n::Int64) vs = BAT.distprod(map(Normal, v, v_err)) NamedTuple.(rand(vs, n)) -end \ No newline at end of file +end + +""" + get_mc_value_shapes(v::NamedTuple, v_err::Matrix, n::Union{Int64,Int32}) +Generate `n` random samples of fit parameters using their respective best-fit values `v` and covariance matrix `v_err` +""" +function get_mc_value_shapes(v::NamedTuple, v_err::Matrix, n::Union{Int64,Int32}) + if !isposdef(v_err) + v_err = nearestSPD(v_err) + @debug "Covariance matrix not positive definite. Using nearestSPD" + end + v_err = v_err[1:6,1:6] #remove background, keep only relevant for sampling + v_fitpar = v[keys(v)[1:size(v_err,1)]] # only fit parameter + dist = MvNormal([v_fitpar...], v_err) # multivariate distribution using covariance matrix) + v_mc = rand(dist, n) # Draw samples + + # constain fit_par_samples to physical values. warning hardcoded. tbd + Idx_keep = findall((v_mc[3,:].>0) .* # positive amplitude + (v_mc[5,:].<0.25).* # skew fraction + (v_mc[5,:].>0) .* #skew fraction + (v_mc[6,:].>0)) # positive skew width + v_mc = v_mc[:,Idx_keep]; + n = size(v_mc,2) + v_mc = [NamedTuple{keys(v)[1:size(v_err,1)]}(v_mc[:,i]) for i=1:n] # convert back to NamedTuple +end +""" + get_friedman_diaconis_bin_width(x::AbstractArray) + +Return the bin width for the given data `x` using the Friedman-Diaconis rule. +""" +function get_friedman_diaconis_bin_width(x::AbstractArray) + 2 * (quantile(x, 0.75) - quantile(x, 0.25)) / ∛(length(x)) +end + + +""" + get_number_of_bins(x::AbstractArray,; method::Symbol=:sqrt) + +Return the number of bins for the given data `x` using the given method. +""" +function get_number_of_bins(x::AbstractArray,; method::Symbol=:sqrt) + # all methods from https://en.wikipedia.org/wiki/Histogram#:~:text=To%20construct%20a%20histogram%2C%20the,overlapping%20intervals%20of%20a%20variable. + if method == :sqrt + return round(Int, sqrt(length(x))) + elseif method == :sturges + return round(Int, ceil(log2(length(x)) + 1)) + elseif method == :rice + return round(Int, 2 * ∛(length(x))) + elseif method == :scott + return round(Int, (maximum(x) - minimum(x)) / (3.5 * std(x) * ∛(length(x)))) + elseif method == :doane + return round(Int, 1 + log2(length(x)) + log2(1 + abs(skewness(x)) / sqrt(6 / (length(x) - 2)))) + elseif method == :fd + return round(Int, (maximum(x) - minimum(x)) / get_friedman_diaconis_bin_width(x)) + else + @assert false "Method not implemented" + end +end + +""" + nearestSPD(A::Matrix{<:Real}) +Returns the nearest positive definite matrix to A +Calculation is based on matrix factorization techniques described in https://www.sciencedirect.com/science/article/pii/0024379588902236 +""" +function nearestSPD(A::Matrix{<:Real}) + B = (A + A') / 2 # make sure matrix is symmetric + _, s, V = svd(B) # singular value decomposition (SVD), s = singular values (~eigenvalues), V = right singular vector (~eigenvector) + H = V * diagm(0 => max.(s, 0)) * V' # symmetric polar factor of B + B = (B + H) / 2 # calculate nearest positive definite matrix + B = (B + B') / 2 # make sure matrix is symmetric + return B +end +export nearestSPD \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 52367955..cbd85e99 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,8 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" # BAT = "c0cd4b16-88b7-57fa-983b-ab80aecada7e" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +LegendDataTypes = "99e09c13-5545-5ee2-bfa2-77f358fb75d8" LegendHDF5IO = "c9265ca6-b027-5446-b1a4-febfa8dd10b0" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -10,3 +12,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] Documenter = "1" +Interpolations = "0.15" +LegendDataTypes = "0.1" diff --git a/test/test_specfit.jl b/test/test_specfit.jl index ea408c28..9b9f9f2a 100644 --- a/test/test_specfit.jl +++ b/test/test_specfit.jl @@ -1,8 +1,20 @@ # This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT). - using LegendSpecFits using Test - +using LegendDataTypes: fast_flatten +using Interpolations @testset "specfit" begin + # load data, simple calibration + include("test_utils.jl") + + energy_test, th228_lines = generate_mc_spectrum(200000) + + # simple calibration fit + window_sizes = vcat([(25.0,25.0) for _ in 1:6], (30.0,30.0)) + result_simple, report_simple = simple_calibration(energy_test, th228_lines, window_sizes, n_bins=10000,; calib_type=:th228, quantile_perc=0.995) + + # fit + result, report = fit_peaks(result_simple.peakhists, result_simple.peakstats, th228_lines,; uncertainty=true); end + diff --git a/test/test_utils.jl b/test/test_utils.jl new file mode 100644 index 00000000..7c3f3b15 --- /dev/null +++ b/test/test_utils.jl @@ -0,0 +1,62 @@ +# This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT). + +""" + generate_mc_spectrum(n_tot::Int=200000,; f_fit::Base.Callable=th228_fit_functions.f_fit) +Sample Legend200 calibration data based on "Inverse Transform Sampling" method +# Method: +* pdf of th228 calibration calibration peak is estimated from fit model function f_fit from LegendSpecFits +* calculate the cumulative distribution function F(x) +* generate a random number u from a uniform distribution between 0 and 1. +* find the value x such that F(x) = u by solving for x . done by interpolation of the inverse cdf +* repeat for many u : energy samples +""" +function generate_mc_spectrum(n_tot::Int=200000,; f_fit::Base.Callable=LegendSpecFits.th228_fit_functions.f_fit) + + th228_lines = [583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.51] + v = [ + (μ = 2103.53, σ = 2.11501, n = 385.123, step_amplitude = 1e-242, skew_fraction = 0.005, skew_width = 0.1, background = 55), + (μ = 860.564, σ = 0.817838, n = 355.84, step_amplitude = 1.2, skew_fraction = 0.005, skew_width = 0.099, background = 35), + (μ = 727.33, σ = 0.721594, n = 452.914, step_amplitude = 5.4, skew_fraction = 0.005, skew_width = 0.1, background = 28), + (μ = 1620.5, σ = 1.24034, n = 130.256, step_amplitude = 1e-20, skew_fraction = 0.005, skew_width = 0.1, background = 16), + (μ = 583.191, σ = 0.701544, n = 1865.52, step_amplitude = 17.9, skew_fraction = 0.1, skew_width = 0.1, background = 16), + (μ = 1592.53, σ = 2.09123, n = 206.827, step_amplitude = 1e-21, skew_fraction = 0.005, skew_width = 0.1, background = 17), + (μ = 2614.51, σ = 1.51289, n = 3130.43, step_amplitude = 1e-101, skew_fraction = 0.1, skew_width = 0.003, background = 1) + ] + + # calculate pdf and cdf functions + bin_centers_all = Array{StepRangeLen,1}(undef, length(th228_lines)) + model_counts_all = Array{Array{Float64,1},1}(undef, length(th228_lines)) + model_cdf_all = Array{Array{Float64,1},1}(undef, length(th228_lines)) + energy_mc_all = Array{Array{Float64,1},1}(undef, length(th228_lines)) + PeakMax = zeros(length(th228_lines)) + + for i=1:length(th228_lines) # get fine binned model function to estimate pdf + n_step = 5000 # fine binning + bin_centers_all[i] = range(v[i].µ-30, stop=v[i].µ+30, length=n_step) + bw = bin_centers_all[i][2]-bin_centers_all[i][1] + bin_widths = range(bw,bw, length=n_step) + + # save as intermediate result + model_counts_all[i] =LegendSpecFits._get_model_counts(f_fit, v[i], bin_centers_all[i], bin_widths) + PeakMax[i] = maximum(model_counts_all[i]) + + # create CDF + model_cdf_all[i] = cumsum(model_counts_all[i]) + model_cdf_all[i] = model_cdf_all[i]./maximum(model_cdf_all[i]) + end + + # weights each peak with amplitude + PeakMaxRel = PeakMax./sum(PeakMax) + n_i = round.(Int,PeakMaxRel.*n_tot) + + # do the sampling: drawn from uniform distribution + for i=1:length(th228_lines) + bandwidth = maximum(model_cdf_all[i])-minimum(model_cdf_all[i]) + rand_i = minimum(model_cdf_all[i]).+bandwidth.*rand(n_i[i]); # make sure sample is within model range + interp_cdf_inv = linear_interpolation(model_cdf_all[i],bin_centers_all[i]) # inverse cdf + energy_mc_all[i] = interp_cdf_inv.(rand_i) + end + + energy_mc = fast_flatten(energy_mc_all) + return energy_mc, th228_lines +end