function to sample test data from model, run test doesnt need to read…

… external data anymore
legend-exp · Feb 3, 2024 · 4203bde · 4203bde
1 parent f4fef01
commit 4203bde
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 13 deletions.
diff --git a/Project.toml b/Project.toml
@@ -12,6 +12,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
@@ -51,6 +52,7 @@ Distributions = "0.24, 0.25"
 FillArrays = "0.7,0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
 Formatting = "0.4"
 ForwardDiff = "0.10"
+Interpolations =  "v0.15.1"
 IntervalSets = "0.7"
 InverseFunctions = "0.1"
 IrrationalConstants = "0.1, 0.2"

diff --git a/src/specfit_testdata.jl b/src/specfit_testdata.jl
@@ -0,0 +1,61 @@
+# This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT).
+"""
+Sample Legend200 calibration data based on "Inverse Transform Sampling" method: 
+- pdf of th228 calibration calibration peak is estimated from fit model function f_fit from LegendSpecFits
+- calculate the cumulative distribution function F(x)
+- generate a random number u from a uniform distribution between 0 and 1.
+- find the value x such that  F(x) = u  by solving for  x . --> done by interpolation of the inverse cdf
+- repeat for many u  --> energy samples 
+"""
+function generate_mc_spectrum(n_tot::Int=200000,; f_fit::Base.Callable=th228_fit_functions.f_fit)
+
+    th228_lines =  [583.191,  727.330,  860.564,  1592.53,    1620.50,    2103.53,    2614.51]
+    v = [ 
+            (μ = 2103.53,   σ = 2.11501,    n = 385.123,    step_amplitude = 1e-242,    skew_fraction = 0.005,  skew_width = 0.1,   background = 55),
+            (μ = 860.564,   σ = 0.817838,   n = 355.84,     step_amplitude = 1.2,       skew_fraction = 0.005,  skew_width = 0.099, background = 35),
+            (μ = 727.33,    σ = 0.721594,   n = 452.914,    step_amplitude = 5.4,       skew_fraction = 0.005,  skew_width = 0.1,   background = 28),
+            (μ = 1620.5,    σ = 1.24034,    n = 130.256,    step_amplitude = 1e-20,     skew_fraction = 0.005,  skew_width = 0.1,   background = 16),
+            (μ = 583.191,   σ = 0.701544,   n = 1865.52,    step_amplitude = 17.9,      skew_fraction = 0.1,    skew_width = 0.1,   background = 16),
+            (μ = 1592.53,   σ = 2.09123,    n = 206.827,    step_amplitude = 1e-21,     skew_fraction = 0.005,  skew_width = 0.1,   background = 17),
+            (μ = 2614.51,   σ = 1.51289,    n = 3130.43,    step_amplitude = 1e-101,    skew_fraction = 0.1,    skew_width = 0.003, background = 1)
+    ]
+
+    # calculate pdf and cdf functions 
+    bin_centers_all     =  Array{StepRangeLen,1}(undef, length(th228_lines))
+    model_counts_all    =  Array{Array{Float64,1},1}(undef, length(th228_lines))
+    model_cdf_all       =  Array{Array{Float64,1},1}(undef, length(th228_lines))
+    energy_mc_all       =  Array{Array{Float64,1},1}(undef, length(th228_lines))
+    PeakMax             = zeros(length(th228_lines))
+
+    for i=1:length(th228_lines)  # get fine binned model function to estimate pdf 
+        n_step = 5000 # fine binning 
+        bin_centers_all[i] = range(v[i].µ-30, stop=v[i].µ+30, length=n_step) 
+        bw = bin_centers_all[i][2]-bin_centers_all[i][1]
+        bin_widths = range(bw,bw, length=n_step) 
+
+        # save as intermediate result 
+        model_counts_all[i] = get_model_counts(f_fit, v[i], bin_centers_all[i], bin_widths)
+        plot(bin_centers_all[i],model_counts_all[i],marker=:dot)
+        PeakMax[i] = maximum(model_counts_all[i])
+
+        # create CDF
+        model_cdf_all[i] = cumsum(model_counts_all[i])
+        model_cdf_all[i] = model_cdf_all[i]./maximum(model_cdf_all[i])
+    end
+
+    # weights each peak with amplitude 
+    PeakMaxRel = PeakMax./sum(PeakMax)
+    n_i = round.(Int,PeakMaxRel.*n_tot)
+
+    # do the sampling: drawn from uniform distribution 
+    for i=1:length(th228_lines)
+        bandwidth = maximum(model_cdf_all[i])-minimum(model_cdf_all[i])
+        rand_i = minimum(model_cdf_all[i]).+bandwidth.*rand(n_i[i]); # make sure sample is within model range 
+        interp_cdf_inv = LinearInterpolation(model_cdf_all[i],bin_centers_all[i]) # inverse cdf
+        energy_mc_all[i] = interp_cdf_inv.(rand_i) 
+    end
+
+    energy_mc = fast_flatten(energy_mc_all)
+    return energy_mc, th228_lines
+end
+export generate_mc_spectrum
diff --git a/test/test_specfit.jl b/test/test_specfit.jl
@@ -1,20 +1,15 @@
 # This file is a part of LegendSpecFits.jl, licensed under the MIT License (MIT).
-
 using LegendSpecFits
 using Test
-using LegendHDF5IO, HDF5
-using LegendDataTypes: readdata
 
 @testset "specfit" begin
     # load data, simple calibration 
-    filename = "/remote/ceph/group/legendex/data/l200/julia/current/generated/tier/jlhitch/cal/p03/r000/l200-p03-r000-cal-ch1080005-tier_jlhit.lh5"
-    data = h5open(x -> readdata(x, "ch1080005/dataQC"), filename)
-    th228_lines =  [583.191,  727.330,  860.564,  1592.53,    1620.50,    2103.53,    2614.51]
-    window_sizes =  vcat([(25.0,25.0) for _ in 1:6], (30.0,30.0))
-    result_simple, report_simple = simple_calibration(data.e_cusp, th228_lines, window_sizes, n_bins=10000,; calib_type=:th228, quantile_perc=0.995)
-
-    # fit a th228 peak
-    Idx = 5 
-    result_fit, report_fit = fit_single_peak_th228(result_simple.peakhists[Idx], result_simple.peakstats[Idx] ; uncertainty=true);
-
+    energy_test, th228_lines = generate_mc_spectrum(200000)
+
+    # simple calibration fit 
+     window_sizes =  vcat([(25.0,25.0) for _ in 1:6], (30.0,30.0))
+    result_simple, report_simple = simple_calibration(energy_test, th228_lines, window_sizes, n_bins=10000,; calib_type=:th228, quantile_perc=0.995)
+
+    # fit 
+    result, report = fit_peaks(result_simple.peakhists, result_simple.peakstats, th228_lines,; uncertainty=true);
 end