From e195104612a232e272616af324b98bc2f5021a99 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:25:54 -0600 Subject: [PATCH 01/20] Add tests for i3files to nugen datasets --- src/simweights/_utils.py | 4 +- tests/test_nugen_datasets.py | 101 +++++++++++++++++++++++++---------- 2 files changed, 74 insertions(+), 31 deletions(-) diff --git a/src/simweights/_utils.py b/src/simweights/_utils.py index 1cff4fc..cd45477 100644 --- a/src/simweights/_utils.py +++ b/src/simweights/_utils.py @@ -99,8 +99,8 @@ def constcol(table: Any, colname: str, mask: ArrayLike | None = None) -> float: """ col = get_column(table, colname) if mask is not None: - col = col[np.asarray(mask, dtype=bool)] - val = col[0] + col = col[mask] + val = col.flat[0] assert np.ndim(val) == 0 assert (val == col).all() return float(val) diff --git a/tests/test_nugen_datasets.py b/tests/test_nugen_datasets.py index b1e9a50..93d01c8 100755 --- a/tests/test_nugen_datasets.py +++ b/tests/test_nugen_datasets.py @@ -4,6 +4,7 @@ # # SPDX-License-Identifier: BSD-2-Clause +import contextlib import os import sys from pathlib import Path @@ -17,6 +18,9 @@ from simweights import NuGenWeighter +with contextlib.suppress(ImportError): + from icecube import dataio + datasets = [ "Level2_IC86.2016_NuE.020885.000000", "Level2_IC86.2016_NuMu.020878.000000", @@ -33,65 +37,104 @@ "Level2_IC86.2011_nugen_NuE.010692.000000", "Level2_IC86.2011_nugen_NuMu.010634.000000", ] +loaders = [ + lambda f: h5py.File(f"{f}.hdf5", "r"), + lambda f: uproot.open(f"{f}.root"), + lambda f: tables.open_file(f"{f}.hdf5", "r"), + lambda f: pd.HDFStore(f"{f}.hdf5", "r"), +] + approx = pytest.approx datadir = os.environ.get("SIMWEIGHTS_TESTDATA", None) -@pytest.mark.parametrize("fname", datasets) -@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -def test_dataset(fname): - filename = Path(datadir) / fname - reffile = h5py.File(str(filename) + ".hdf5", "r") +def load_reference_values(fname): + d = {} + reffile = h5py.File(fname, "r") wd = reffile["I3MCWeightDict"] - pdgid = wd["PrimaryNeutrinoType"][0] + d["pdgid"] = wd["PrimaryNeutrinoType"][0] - solid_angle = 2 * np.pi * (np.cos(wd["MinZenith"]) - np.cos(wd["MaxZenith"])) + d["solid_angle"] = 2 * np.pi * (np.cos(wd["MinZenith"]) - np.cos(wd["MaxZenith"])) if "SolidAngle" in wd.dtype.names: - np.testing.assert_allclose(solid_angle, wd["SolidAngle"]) + np.testing.assert_allclose(d["solid_angle"], wd["SolidAngle"]) if "InjectionAreaCGS" in wd.dtype.names: - injection_area = wd["InjectionAreaCGS"] + d["injection_area"] = wd["InjectionAreaCGS"] if "InjectionAreaNormCGS" in wd.dtype.names: - injection_area = wd["InjectionAreaNormCGS"] + d["injection_area"] = wd["InjectionAreaNormCGS"] if "TotalWeight" in wd.dtype.names: - total_weight = wd["TotalWeight"] + d["total_weight"] = wd["TotalWeight"] elif "TotalInteractionProbabilityWeight" in wd.dtype.names: - total_weight = wd["TotalInteractionProbabilityWeight"] + d["total_weight"] = wd["TotalInteractionProbabilityWeight"] type_weight = wd["TypeWeight"] if "TypeWeight" in wd.dtype.names else 0.5 - w0 = wd["OneWeight"] / (wd["NEvents"] * type_weight) + d["OneWeight"] = wd["OneWeight"] + d["OneWeightByN"] = wd["OneWeight"] / (wd["NEvents"] * type_weight) - fobjs = [ - reffile, - uproot.open(str(filename) + ".root"), - tables.open_file(str(filename) + ".hdf5", "r"), - pd.HDFStore(str(filename) + ".hdf5", "r"), - ] + return d - for fobj in fobjs: + +@pytest.mark.parametrize("fname", datasets) +@pytest.mark.parametrize("loader", loaders) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +def test_dataset(fname, loader): + filename = Path(datadir) / fname + ref_values = load_reference_values(f"{filename}.hdf5") + + with loader(filename) as fobj: w = NuGenWeighter(fobj, nfiles=1) event_weight = w.get_weight_column("event_weight") - assert event_weight == approx(total_weight) + assert event_weight == approx(ref_values["total_weight"]) + cylinder = w.surface.spectra[ref_values["pdgid"]][0].dists[2] + proj_area = cylinder.projected_area(w.get_weight_column("cos_zen")) + assert proj_area == approx(ref_values["injection_area"]) + + sw_etendue = 1 / cylinder.pdf(w.get_weight_column("cos_zen")) + assert sw_etendue == approx(ref_values["solid_angle"] * ref_values["injection_area"], rel=1e-5) + + power_law = w.surface.spectra[ref_values["pdgid"]][0].dists[1] + energy_factor = 1 / power_law.pdf(w.get_weight_column("energy")) + one_weight = ( + w.get_weight_column("event_weight") * energy_factor * ref_values["solid_angle"] * ref_values["injection_area"] + ) + assert one_weight == approx(ref_values["OneWeight"]) + assert w.get_weights(1) == approx(ref_values["OneWeightByN"], rel=1e-5) + + +@pytest.mark.parametrize("fname", datasets) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +@pytest.mark.skipif("dataio" not in globals(), reason="not in icetray environment") +def test_dataset_i3file(fname): + d = load_reference_values(Path(datadir) / (fname + ".hdf5")) + + i = 0 + f = dataio.I3File(str(Path(datadir) / (fname + ".i3.zst"))) + while f.more(): + frame = f.pop_frame() + w = NuGenWeighter(frame, nfiles=1) + event_weight = w.get_weight_column("event_weight") + assert event_weight == approx(d["total_weight"][i]) + + pdgid = int(frame["I3MCWeightDict"]["PrimaryNeutrinoType"]) cylinder = w.surface.spectra[pdgid][0].dists[2] proj_area = cylinder.projected_area(w.get_weight_column("cos_zen")) - assert proj_area == approx(injection_area) + assert proj_area == approx(d["injection_area"][i]) sw_etendue = 1 / cylinder.pdf(w.get_weight_column("cos_zen")) - assert sw_etendue == approx(solid_angle * injection_area, rel=1e-5) + assert sw_etendue == approx(d["solid_angle"][i] * d["injection_area"][i], rel=1e-5) power_law = w.surface.spectra[pdgid][0].dists[1] energy_factor = 1 / power_law.pdf(w.get_weight_column("energy")) - one_weight = w.get_weight_column("event_weight") * energy_factor * solid_angle * injection_area - assert one_weight == approx(wd["OneWeight"]) - - assert w0 == approx(w.get_weights(1), rel=1e-5) + one_weight = w.get_weight_column("event_weight") * energy_factor * d["solid_angle"][i] * d["injection_area"][i] - for fobj in fobjs: - fobj.close() + assert one_weight == approx(d["OneWeight"][i]) + assert d["OneWeightByN"][i] == approx(w.get_weights(1), rel=1e-5) + i += 1 + f.close() if __name__ == "__main__": From 5f58d63b2be51e2248568a09a0aee7012750fd04 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:57:22 -0600 Subject: [PATCH 02/20] toward i3file unit tests --- contrib/book_simweights_testdata.py | 166 ++++++++++++++++++++-------- tests/test_corsika_datasets.py | 54 ++++----- tests/test_genie_datasets.py | 117 ++++++++++++++------ tests/test_nugen_datasets.py | 10 +- 4 files changed, 240 insertions(+), 107 deletions(-) diff --git a/contrib/book_simweights_testdata.py b/contrib/book_simweights_testdata.py index da34a8f..49a3f0d 100755 --- a/contrib/book_simweights_testdata.py +++ b/contrib/book_simweights_testdata.py @@ -29,45 +29,123 @@ def fake_event_header(frame: dict) -> None: filelist = { "corsika": [ - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/12602/0000000-0000999/Level2_IC86.2015_corsika.012602.000000.i3.bz2", - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20014/0000000-0000999/Level2_IC86.2015_corsika.020014.000000.i3.bz2", - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20021/0000000-0000999/Level2_IC86.2015_corsika.020021.000000.i3.bz2", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20208/0000000-0000999/Level2_IC86.2016_corsika.020208.000001.i3.bz2", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20243/0000000-0000999/Level2_IC86.2016_corsika.020243.000001.i3.bz2", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20263/0000000-0000999/Level2_IC86.2016_corsika.020263.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20777/0000000-0000999/Level2_IC86.2016_corsika.020777.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20778/0000000-0000999/Level2_IC86.2016_corsika.020778.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20780/0000000-0000999/Level2_IC86.2016_corsika.020780.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999/Level2_IC86.2016_corsika.021889.000000.i3.zst", + ( + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/12602/0000000-0000999/Level2_IC86.2015_corsika.012602.000000.i3.bz2", + False, + ), + ( + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20014/0000000-0000999/Level2_IC86.2015_corsika.020014.000000.i3.bz2", + False, + ), + ( + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20021/0000000-0000999/Level2_IC86.2015_corsika.020021.000000.i3.bz2", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20208/0000000-0000999/Level2_IC86.2016_corsika.020208.000001.i3.bz2", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20243/0000000-0000999/Level2_IC86.2016_corsika.020243.000001.i3.bz2", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20263/0000000-0000999/Level2_IC86.2016_corsika.020263.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20777/0000000-0000999/Level2_IC86.2016_corsika.020777.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20778/0000000-0000999/Level2_IC86.2016_corsika.020778.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20780/0000000-0000999/Level2_IC86.2016_corsika.020780.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999/Level2_IC86.2016_corsika.021889.000000.i3.zst", + False, + ), ], "nugen": [ - "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10634/00000-00999/Level2_IC86.2011_nugen_NuMu.010634.000000.i3.bz2", - "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10692/00000-00999/Level2_IC86.2011_nugen_NuE.010692.000000.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11029/00000-00999/Level2_nugen_numu_IC86.2012.011029.000000.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11065/00000-00999/Level2_IC86.2012_nugen_NuTau.011065.000001.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11069/00000-00999/Level2_nugen_numu_IC86.2012.011069.000000.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11070/00000-00999/Level2_nugen_numu_IC86.2012.011070.000000.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11297/00000-00999/Level2_nugen_nutau_IC86.2012.011297.000000.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11374/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_numu.011374.000050.clsim-base-4.0.3.0.99_eff.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11477/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011477.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11836/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011836.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/12646/0000000-0000999/clsim-base-4.0.5.0.99_eff/Level2_IC86.2012_nugen_nue.012646.000000.clsim-base-4.0.5.0.99_eff.i3.bz2", - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20878/0000000-0000999/Level2_IC86.2016_NuMu.020878.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20885/0000000-0000999/Level2_IC86.2016_NuE.020885.000000.i3.zst", - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20895/0000000-0000999/Level2_IC86.2016_NuTau.020895.000000.i3.zst", + ( + "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10634/00000-00999/Level2_IC86.2011_nugen_NuMu.010634.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10692/00000-00999/Level2_IC86.2011_nugen_NuE.010692.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11029/00000-00999/Level2_nugen_numu_IC86.2012.011029.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11065/00000-00999/Level2_IC86.2012_nugen_NuTau.011065.000001.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11069/00000-00999/Level2_nugen_numu_IC86.2012.011069.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11070/00000-00999/Level2_nugen_numu_IC86.2012.011070.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11297/00000-00999/Level2_nugen_nutau_IC86.2012.011297.000000.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11374/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_numu.011374.000050.clsim-base-4.0.3.0.99_eff.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11477/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011477.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11836/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011836.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/12646/0000000-0000999/clsim-base-4.0.5.0.99_eff/Level2_IC86.2012_nugen_nue.012646.000000.clsim-base-4.0.5.0.99_eff.i3.bz2", + True, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20878/0000000-0000999/Level2_IC86.2016_NuMu.020878.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20885/0000000-0000999/Level2_IC86.2016_NuE.020885.000000.i3.zst", + False, + ), + ( + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20895/0000000-0000999/Level2_IC86.2016_NuTau.020895.000000.i3.zst", + False, + ), ], "icetop": [ - "/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/Level3_IC86.2012_SIBYLL2.1_p_12360_E6.0_0.i3.bz2", + ( + "/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/Level3_IC86.2012_SIBYLL2.1_p_12360_E6.0_0.i3.bz2", + True, + ), ], "genie": [ - "/data/sim/IceCubeUpgrade/genie/step3/141828/upgrade_genie_step3_141828_000000.i3.zst", - "/data/sim/IceCube/2023/generated/GENIE/22590/0000000-0000999/GENIE_NuMu_IceCubeUpgrade_v58.22590.000000.i3.zst", - "/data/ana/Software/simweights/test-data/genie_numu_volume_scaling.i3.zst", - "/data/ana/Software/simweights/test-data/genie-icetray.140000A_000000.i3.zst", - "/data/ana/Software/simweights/test-data/genie-icetray.140000B_000000.i3.zst", - "/data/ana/Software/simweights/test-data/genie-icetray.140000C_000000.i3.zst", - "/data/ana/Software/simweights/test-data/genie-icetray.140000D_000000.i3.zst", - "/data/ana/Software/simweights/test-data/level2_genie-icetray.140000_000000.i3.zst", + ("/data/sim/IceCubeUpgrade/genie/step3/141828/upgrade_genie_step3_141828_000000.i3.zst", True), + ( + "/data/sim/IceCube/2023/generated/GENIE/22590/0000000-0000999/GENIE_NuMu_IceCubeUpgrade_v58.22590.000000.i3.zst", + True, + ), + ("/data/ana/Software/simweights/test-data/genie_numu_volume_scaling.i3.zst", True), + ("/data/ana/Software/simweights/test-data/genie-icetray.140000A_000000.i3.zst", True), + ("/data/ana/Software/simweights/test-data/genie-icetray.140000B_000000.i3.zst", True), + ("/data/ana/Software/simweights/test-data/genie-icetray.140000C_000000.i3.zst", True), + ("/data/ana/Software/simweights/test-data/genie-icetray.140000D_000000.i3.zst", True), + ("/data/ana/Software/simweights/test-data/level2_genie-icetray.140000_000000.i3.zst", True), ], } keys = { @@ -87,12 +165,6 @@ def fake_event_header(frame: dict) -> None: ], "icetop": ["I3TopInjectorInfo", "MCPrimary"], } -streams = { - "corsika": ["InIceSplit"], - "nugen": ["InIceSplit", "in_ice"], - "genie": ["NullSplit"], - "icetop": ["IceTopSplit"], -} if "notemp" in sys.argv: @@ -101,17 +173,14 @@ def fake_event_header(frame: dict) -> None: tempdir = tempfile.TemporaryDirectory(prefix="simweights_testdata_") outdir = Path(tempdir.name) -for simtype, filename in ((i, x) for i in filelist for x in filelist[i]): +for simtype, (filename, split) in ((i, x) for i in filelist for x in filelist[i]): basename = Path(filename).name.replace(".i3.zst", "").replace(".i3.bz2", "").replace(".i3.gz", "") assert basename != Path(filename).name - - split = simtype == "genie" outfile = outdir / basename print(f"Booking : {filename}") print(f" outfile: {outfile}") print(f" keys : {keys[simtype]}") - print(f" streams: {streams[simtype]}") tray = icetray.I3Tray() tray.Add("I3Reader", FileNameList=[filename]) @@ -129,14 +198,19 @@ def fake_event_header(frame: dict) -> None: hdfwriter.I3HDFTableService(str(outfile) + ".hdf5"), rootwriter.I3ROOTTableService(str(outfile) + ".root"), ], - SubEventStreams=streams[simtype], + SubEventStreams=["NullSplit"], keys=keys[simtype], ) - tray.Add("Keep", keys=keys[simtype]) + tray.Add("Keep", keys=["I3EventHeader"] + keys[simtype]) + + def s(frame): + return frame["I3EventHeader"].sub_event_stream == "NullSplit" + + tray.AddModule(s) tray.Add( "I3Writer", Filename=str(outfile) + ".i3.zst", - Streams=[icetray.I3Frame.Simulation, icetray.I3Frame.DAQ], + Streams=[icetray.I3Frame.Simulation, icetray.I3Frame.DAQ, icetray.I3Frame.Physics], DropOrphanStreams=[icetray.I3Frame.Physics], ) diff --git a/tests/test_corsika_datasets.py b/tests/test_corsika_datasets.py index 77996ac..feef9e3 100755 --- a/tests/test_corsika_datasets.py +++ b/tests/test_corsika_datasets.py @@ -24,16 +24,26 @@ datadir = Path(datadir) datasets = [ - (False, "Level2_IC86.2015_corsika.012602.000000", 102.01712611701736), - (False, "Level2_IC86.2015_corsika.020014.000000", 23.015500214424705), - (False, "Level2_IC86.2015_corsika.020021.000000", 69.75465614509928), - (False, "Level2_IC86.2016_corsika.020208.000001", 22.622983704306385), - (False, "Level2_IC86.2016_corsika.020243.000001", 4.590586137762489), - (False, "Level2_IC86.2016_corsika.020263.000000", 10.183937153798436), - (False, "Level2_IC86.2016_corsika.020777.000000", 362.94284441826704), - (False, "Level2_IC86.2016_corsika.020778.000000", 6.2654796956603), - (False, "Level2_IC86.2016_corsika.020780.000000", 14.215947086098588), - (True, "Level2_IC86.2016_corsika.021889.000000", 122.83809329321922), + pytest.param(False, "Level2_IC86.2015_corsika.012602.000000", 101.71983520393832, id="12602"), + pytest.param(False, "Level2_IC86.2015_corsika.020014.000000", 22.857025444108743, id="20014"), + pytest.param(False, "Level2_IC86.2015_corsika.020021.000000", 68.93732262681625, id="20021"), + pytest.param(False, "Level2_IC86.2016_corsika.020208.000001", 12.397742530207822, id="20208"), + pytest.param(False, "Level2_IC86.2016_corsika.020243.000001", 3.302275062730073, id="20243"), + pytest.param(False, "Level2_IC86.2016_corsika.020263.000000", 5.3137132171197905, id="20263"), + pytest.param(False, "Level2_IC86.2016_corsika.020777.000000", 359.20422121174204, id="20777"), + pytest.param(False, "Level2_IC86.2016_corsika.020778.000000", 6.25969855736358, id="20778"), + pytest.param(False, "Level2_IC86.2016_corsika.020780.000000", 13.864780296171585, id="20780"), + pytest.param(True, "Level2_IC86.2016_corsika.021889.000000", 122.56278334422919, id="21889"), +] + +loaders = [ + pytest.param(lambda f: h5py.File(str(f) + ".hdf5", "r"), id="h5py"), + pytest.param(lambda f: uproot.open(str(f) + ".root"), id="uproot"), + pytest.param(lambda f: tables.open_file(str(f) + ".hdf5", "r"), id="pytables"), + pytest.param( + lambda f: pd.HDFStore(str(f) + ".hdf5", "r"), + id="pandas", + ), ] @@ -88,8 +98,9 @@ def triggered_weights(f): @pytest.mark.parametrize(("triggered", "fname", "rate"), datasets) +@pytest.mark.parametrize("loader", loaders) @pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -def test_dataset(triggered, fname, rate): +def test_dataset(triggered, fname, rate, loader): fname = datadir / fname if triggered: @@ -102,21 +113,12 @@ def test_dataset(triggered, fname, rate): reffile = h5py.File(str(fname) + ".hdf5", "r") w0 = refweight(reffile) - inputfiles = [ - ("h5py", reffile), - ("uproot", uproot.open(str(fname) + ".root")), - ("tables", tables.open_file(str(fname) + ".hdf5", "r")), - ("pandas", pd.HDFStore(str(fname) + ".hdf5", "r")), - ] - - for _, infile in inputfiles: - wobj = CorsikaWeighter(infile, nfiles) - w = wobj.get_weights(flux) - assert w.sum() == pytest.approx(rate) - assert w0 == pytest.approx(w, 1e-6) - - for _, infile in inputfiles: - infile.close() + infile = loader(fname) + wobj = CorsikaWeighter(infile, nfiles) + w = wobj.get_weights(flux) + assert w.sum() == pytest.approx(rate) + assert w0 == pytest.approx(w, 1e-6) + infile.close() if __name__ == "__main__": diff --git a/tests/test_genie_datasets.py b/tests/test_genie_datasets.py index da18144..3da8d1d 100755 --- a/tests/test_genie_datasets.py +++ b/tests/test_genie_datasets.py @@ -4,6 +4,7 @@ # # SPDX-License-Identifier: BSD-2-Clause +import contextlib import os import sys from pathlib import Path @@ -17,22 +18,31 @@ from simweights import GenieWeighter +with contextlib.suppress(ImportError): + from icecube import dataio, simclasses + +approx = pytest.approx datadir = os.environ.get("SIMWEIGHTS_TESTDATA", None) datasets = [ "upgrade_genie_step3_141828_000000", "GENIE_NuMu_IceCubeUpgrade_v58.22590.000000", "genie_numu_volume_scaling", ] -approx = pytest.approx +loaders = [ + pytest.param(lambda f: h5py.File(str(f) + ".hdf5", "r"), id="h5py"), + pytest.param(lambda f: uproot.open(str(f) + ".root"), id="uproot"), + pytest.param(lambda f: tables.open_file(str(f) + ".hdf5", "r"), id="pytables"), + pytest.param(lambda f: pd.HDFStore(str(f) + ".hdf5", "r"), id="pandas"), +] -@pytest.mark.parametrize("fname", datasets) -@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -def test_dataset(fname): - filename = Path(datadir) / fname - reffile = h5py.File(str(filename) + ".hdf5", "r") + +def load_reference_values(fname): + ref = {} + reffile = h5py.File(str(fname) + ".hdf5", "r") info = reffile["I3GenieInfo"][0] wd = reffile["I3MCWeightDict"] + ref["GENIEWeight"] = wd["GENIEWeight"] n_flux_events = info["n_flux_events"] primary_type = info["primary_type"] @@ -42,7 +52,7 @@ def test_dataset(fname): min_energy = info["min_energy"] max_energy = info["max_energy"] power_law_index = info["power_law_index"] - global_probability_scale = info["global_probability_scale"] + ref["global_probability_scale"] = info["global_probability_scale"] muon_volume_scaling = info["muon_volume_scaling"] if "PrimaryNeutrinoType" in wd.dtype.names: @@ -53,50 +63,95 @@ def test_dataset(fname): assert 10 ** wd["MinEnergyLog"] == approx(min_energy) assert 10 ** wd["MaxEnergyLog"] == approx(max_energy) assert wd["PowerLawIndex"] == approx(power_law_index) - assert wd["GlobalProbabilityScale"] == approx(global_probability_scale) + assert wd["GlobalProbabilityScale"] == approx(ref["global_probability_scale"]) if "MuonVolumeScaling" in wd.dtype.names: assert wd["MuonVolumeScaling"] == approx(muon_volume_scaling) - solid_angle = 2 * np.pi * (np.cos(min_zenith) - np.cos(max_zenith)) - injection_area = np.pi * (cylinder_radius * 1e2) ** 2 + ref["solid_angle"] = 2 * np.pi * (np.cos(min_zenith) - np.cos(max_zenith)) + ref["injection_area"] = np.pi * (cylinder_radius * 1e2) ** 2 if power_law_index == 1: energy_integral = np.log(max_energy / min_energy) else: energy_integral = (max_energy ** (1 - power_law_index) - min_energy ** (1 - power_law_index)) / (1 - power_law_index) - energy_factor = 1 / (wd["PrimaryNeutrinoEnergy"] ** (-power_law_index) / energy_integral) + ref["energy_factor"] = 1 / (wd["PrimaryNeutrinoEnergy"] ** (-power_law_index) / energy_integral) + + ref["one_weight"] = ( + wd["TotalInteractionProbabilityWeight"] * ref["energy_factor"] * ref["solid_angle"] * ref["injection_area"] + ) + np.testing.assert_allclose(ref["one_weight"], wd["OneWeight"]) + ref["final_weight"] = wd["OneWeight"] / n_flux_events + return ref + + +@pytest.mark.parametrize("fname", datasets) +@pytest.mark.parametrize("loader", loaders) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +def test_dataset(fname, loader): + filename = Path(datadir) / fname + ref = load_reference_values(filename) + + fobj = loader(filename) + w = GenieWeighter(fobj) + + gprob, _, _, edist = next(iter(w.surface.spectra.values()))[0].dists + energy_term = 1 / edist.pdf(w.get_weight_column("energy")) + + assert gprob.v == approx(1 / ref["solid_angle"] / ref["injection_area"] / ref["global_probability_scale"]) + assert w.get_weight_column("wght") == approx(ref["GENIEWeight"]) + assert energy_term == approx(ref["energy_factor"]) + + vol_scale = w.get_weight_column("volscale") + one_weight = ( + w.get_weight_column("wght") + * energy_term + * ref["solid_angle"] + * ref["injection_area"] + * ref["global_probability_scale"] + * vol_scale + ) + assert one_weight == approx(ref["one_weight"], rel=1e-5) - one_weight = wd["TotalInteractionProbabilityWeight"] * energy_factor * solid_angle * injection_area - np.testing.assert_allclose(one_weight, wd["OneWeight"]) - final_weight = wd["OneWeight"] / n_flux_events + assert w.get_weights(1) == approx(ref["final_weight"], rel=1e-5) - fobjs = [ - reffile, - uproot.open(str(filename) + ".root"), - tables.open_file(str(filename) + ".hdf5", "r"), - pd.HDFStore(str(filename) + ".hdf5", "r"), - ] + fobj.close() - for fobj in fobjs: - w = GenieWeighter(fobj) + +@pytest.mark.parametrize("fname", datasets) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +@pytest.mark.skipif("dataio" not in globals(), reason="not in icetray environment") +def test_dataset_i3file(fname): + filename = Path(datadir) / fname + ref = load_reference_values(filename) + + i = 0 + f = dataio.I3File(str(Path(datadir) / (fname + ".i3.zst"))) + while f.more(): + frame = f.pop_frame() + if frame.Stop != frame.Physics: + continue + w = GenieWeighter(frame) gprob, _, _, edist = next(iter(w.surface.spectra.values()))[0].dists energy_term = 1 / edist.pdf(w.get_weight_column("energy")) - assert gprob.v == approx(1 / solid_angle / injection_area / global_probability_scale) - assert w.get_weight_column("wght") == approx(wd["GENIEWeight"]) - assert energy_term == approx(energy_factor) + assert gprob.v == approx(1 / ref["solid_angle"] / ref["injection_area"] / ref["global_probability_scale"]) + assert w.get_weight_column("wght") == approx(ref["GENIEWeight"][i]) + assert energy_term == approx(ref["energy_factor"][i]) vol_scale = w.get_weight_column("volscale") one_weight = ( - w.get_weight_column("wght") * energy_term * solid_angle * injection_area * global_probability_scale * vol_scale + w.get_weight_column("wght") + * energy_term + * ref["solid_angle"] + * ref["injection_area"] + * ref["global_probability_scale"] + * vol_scale ) - assert one_weight == approx(wd["OneWeight"], rel=1e-5) - - assert w.get_weights(1) == approx(final_weight, rel=1e-5) + assert one_weight == approx(ref["one_weight"][i], rel=1e-5) - for fobj in fobjs: - fobj.close() + assert w.get_weights(1) == approx(ref["final_weight"][i], rel=1e-5) + i += 1 if __name__ == "__main__": diff --git a/tests/test_nugen_datasets.py b/tests/test_nugen_datasets.py index 93d01c8..454914d 100755 --- a/tests/test_nugen_datasets.py +++ b/tests/test_nugen_datasets.py @@ -38,10 +38,10 @@ "Level2_IC86.2011_nugen_NuMu.010634.000000", ] loaders = [ - lambda f: h5py.File(f"{f}.hdf5", "r"), - lambda f: uproot.open(f"{f}.root"), - lambda f: tables.open_file(f"{f}.hdf5", "r"), - lambda f: pd.HDFStore(f"{f}.hdf5", "r"), + pytest.param(lambda f: h5py.File(f"{f}.hdf5", "r"), id="h5py"), + pytest.param(lambda f: uproot.open(f"{f}.root"), id="uproot"), + pytest.param(lambda f: tables.open_file(f"{f}.hdf5", "r"), id="pytables"), + pytest.param(lambda f: pd.HDFStore(f"{f}.hdf5", "r"), id="pandas"), ] approx = pytest.approx @@ -115,6 +115,8 @@ def test_dataset_i3file(fname): f = dataio.I3File(str(Path(datadir) / (fname + ".i3.zst"))) while f.more(): frame = f.pop_frame() + if frame.Stop != frame.Physics: + continue w = NuGenWeighter(frame, nfiles=1) event_weight = w.get_weight_column("event_weight") assert event_weight == approx(d["total_weight"][i]) From 4467b4e96edc77fc1663e72d240b8d3b51c9bdbf Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:12:56 -0600 Subject: [PATCH 03/20] fix all issues with i3file --- .pre-commit-config.yaml | 12 +++--- contrib/book_simweights_testdata.py | 6 +-- pyproject.toml | 3 +- src/simweights/__init__.py | 16 ++++---- src/simweights/_spatial.py | 2 +- src/simweights/_utils.py | 2 +- src/simweights/_weighter.py | 2 +- tests/test_genie_datasets.py | 2 +- tests/test_icetop_datasets.py | 64 ++++++++++++++++------------- 9 files changed, 56 insertions(+), 53 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a6d367..fcec4b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ ci: autoupdate_schedule: quarterly repos: - repo: https://github.com/google/yamlfmt - rev: v0.13.0 + rev: v0.15.0 hooks: - id: yamlfmt - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks @@ -16,7 +16,7 @@ repos: - id: pretty-format-toml args: [--autofix] - repo: https://github.com/fsfe/reuse-tool - rev: v4.0.3 + rev: v5.0.2 hooks: - id: reuse - repo: https://github.com/codespell-project/codespell @@ -24,26 +24,26 @@ repos: hooks: - id: codespell - repo: https://github.com/adamchainz/blacken-docs - rev: "1.19.0" + rev: "1.19.1" hooks: - id: blacken-docs args: [-l 100] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 + rev: v1.14.1 hooks: - id: mypy files: simweights additional_dependencies: [numpy] exclude: ^contrib/ - repo: https://github.com/pycqa/pylint - rev: v3.3.1 + rev: v3.3.3 hooks: - id: pylint files: simweights exclude: ^contrib/ additional_dependencies: [numpy, pandas] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.9.0 hooks: - id: ruff args: [--fix, --show-fixes] diff --git a/contrib/book_simweights_testdata.py b/contrib/book_simweights_testdata.py index 49a3f0d..4e76e34 100755 --- a/contrib/book_simweights_testdata.py +++ b/contrib/book_simweights_testdata.py @@ -202,11 +202,7 @@ def fake_event_header(frame: dict) -> None: keys=keys[simtype], ) tray.Add("Keep", keys=["I3EventHeader"] + keys[simtype]) - - def s(frame): - return frame["I3EventHeader"].sub_event_stream == "NullSplit" - - tray.AddModule(s) + tray.AddModule(lambda frame: frame["I3EventHeader"].sub_event_stream == "NullSplit") tray.Add( "I3Writer", Filename=str(outfile) + ".i3.zst", diff --git a/pyproject.toml b/pyproject.toml index a6d23e6..5a9e6bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,8 @@ select = ["ALL"] "examples/*" = [ "D", # pydocstyle "F401", # unused-import - "T201" # flake8-print + "T201", # flake8-print + "PLC0206" # Extracting value from dictionary without calling `.items()` ] "tests/*" = [ "D", # pydocstyle diff --git a/src/simweights/__init__.py b/src/simweights/__init__.py index 63e98b0..d567c85 100644 --- a/src/simweights/__init__.py +++ b/src/simweights/__init__.py @@ -15,17 +15,16 @@ __version__ = "0.1.3" __all__ = [ + "TIG1996", "CircleInjector", "CorsikaWeighter", - "NaturalRateCylinder", - "UniformSolidAngleCylinder", - "TIG1996", "FixedFractionFlux", - "GenieWeighter", "GaisserH3a", "GaisserH4a", "GaisserH4a_IT", "GaisserHillas", + "GenerationSurface", + "GenieWeighter", "GlobalFitGST", "GlobalFitGST_IT", "GlobalSplineFit", @@ -36,13 +35,14 @@ "Hoerandel_IT", "Honda2004", "IceTopWeighter", - "PDGCode", - "corsika_to_pdg", - "generation_surface", - "GenerationSurface", + "NaturalRateCylinder", "NuGenWeighter", + "PDGCode", "PowerLaw", + "UniformSolidAngleCylinder", "Weighter", + "corsika_to_pdg", + "generation_surface", ] from ._corsika_weighter import CorsikaWeighter diff --git a/src/simweights/_spatial.py b/src/simweights/_spatial.py index 6d4b3d3..493186b 100644 --- a/src/simweights/_spatial.py +++ b/src/simweights/_spatial.py @@ -61,7 +61,7 @@ def pdf(self: CylinderBase, cos_zen: ArrayLike) -> NDArray[np.float64]: raise NotImplementedError def __repr__(self: CylinderBase) -> str: - return f"{self.__class__.__name__}" f"({self.length}, {self.radius}, {self.cos_zen_min}, {self.cos_zen_max})" + return f"{self.__class__.__name__}({self.length}, {self.radius}, {self.cos_zen_min}, {self.cos_zen_max})" def __eq__(self: CylinderBase, other: object) -> bool: return ( diff --git a/src/simweights/_utils.py b/src/simweights/_utils.py index cd45477..c52e498 100644 --- a/src/simweights/_utils.py +++ b/src/simweights/_utils.py @@ -91,7 +91,7 @@ def get_column(table: Any, name: str) -> NDArray[np.float64]: return np.asarray(column, dtype=np.float64) -def constcol(table: Any, colname: str, mask: ArrayLike | None = None) -> float: +def constcol(table: Any, colname: str, mask: Any = None) -> float: """Helper function which makes sure that all of the entries in a column are exactly the same. This is necessary because CORSIKA and NuGen store generation surface parameters in every frame and we diff --git a/src/simweights/_weighter.py b/src/simweights/_weighter.py index df44892..70fb015 100644 --- a/src/simweights/_weighter.py +++ b/src/simweights/_weighter.py @@ -255,7 +255,7 @@ def tostring(self: Weighter, flux: None | object | Callable[[Any], ArrayLike] | weights = self.get_weights(flux) output += f"Using flux model : {flux.__class__.__name__}\n" output += f"Event Rate : {weights.sum():8.6g} Hz\n" - output += f"Livetime : {weights.sum() / (weights ** 2).sum():8.6g} s\n" + output += f"Livetime : {weights.sum() / (weights**2).sum():8.6g} s\n" return output def __str__(self: Weighter) -> str: diff --git a/tests/test_genie_datasets.py b/tests/test_genie_datasets.py index 3da8d1d..a22407d 100755 --- a/tests/test_genie_datasets.py +++ b/tests/test_genie_datasets.py @@ -19,7 +19,7 @@ from simweights import GenieWeighter with contextlib.suppress(ImportError): - from icecube import dataio, simclasses + from icecube import dataio, simclasses # noqa: F401 approx = pytest.approx datadir = os.environ.get("SIMWEIGHTS_TESTDATA", None) diff --git a/tests/test_icetop_datasets.py b/tests/test_icetop_datasets.py index aabfaaf..dbc1fd1 100755 --- a/tests/test_icetop_datasets.py +++ b/tests/test_icetop_datasets.py @@ -21,41 +21,47 @@ datasets = ["Level3_IC86.2012_SIBYLL2.1_p_12360_E6.0_0"] approx = pytest.approx +loaders = [ + pytest.param(lambda f: h5py.File(str(f) + ".hdf5", "r"), id="h5py"), + pytest.param(lambda f: uproot.open(str(f) + ".root"), id="uproot"), + pytest.param(lambda f: tables.open_file(str(f) + ".hdf5", "r"), id="pytables"), + pytest.param(lambda f: pd.HDFStore(str(f) + ".hdf5", "r"), id="pandas"), +] -@pytest.mark.parametrize("fname", datasets) -@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -def test_dataset(fname): - filename = Path(datadir) / fname + +def load_reference_values(filename): reffile = h5py.File(str(filename) + ".hdf5", "r") + ref = {} assert len(reffile["I3TopInjectorInfo"]) == 1 si = reffile["I3TopInjectorInfo"][0] pri = reffile["MCPrimary"] - solid_angle = np.pi * (np.cos(si["min_zenith"]) ** 2 - np.cos(si["max_zenith"]) ** 2) - injection_area = np.pi * (si["sampling_radius"] * 1e2) ** 2 - energy_integral = np.log(si["max_energy"] / si["min_energy"]) # assuming E^-1 - energy_factor = energy_integral * pri["energy"] - final_weight = energy_factor * solid_angle * injection_area / si["n_events"] - - fobjs = [ - reffile, - uproot.open(str(filename) + ".root"), - tables.open_file(str(filename) + ".hdf5", "r"), - pd.HDFStore(str(filename) + ".hdf5", "r"), - ] - - for fobj in fobjs: - w = IceTopWeighter(fobj) - spatial = w.surface.spectra[2212][0].dists[1] - proj_area = spatial.projected_area(1) - assert proj_area == approx(injection_area) - sw_etendue = 1 / spatial.pdf(1) - assert sw_etendue == approx(solid_angle * injection_area, 1e-5) - assert energy_factor == approx(energy_integral * w.get_weight_column("energy")) - assert final_weight == approx(w.get_weights(1), 1e-5) - - for fobj in fobjs: - fobj.close() + ref["solid_angle"] = np.pi * (np.cos(si["min_zenith"]) ** 2 - np.cos(si["max_zenith"]) ** 2) + ref["injection_area"] = np.pi * (si["sampling_radius"] * 1e2) ** 2 + ref["energy_integral"] = np.log(si["max_energy"] / si["min_energy"]) # assuming E^-1 + ref["energy_factor"] = ref["energy_integral"] * pri["energy"] + ref["final_weight"] = ref["energy_factor"] * ref["solid_angle"] * ref["injection_area"] / si["n_events"] + return ref + + +@pytest.mark.parametrize("fname", datasets) +@pytest.mark.parametrize("loader", loaders) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +def test_dataset(fname, loader): + filename = Path(datadir) / fname + + ref = load_reference_values(filename) + fobj = loader(filename) + w = IceTopWeighter(fobj) + spatial = w.surface.spectra[2212][0].dists[1] + proj_area = spatial.projected_area(1) + assert proj_area == approx(ref["injection_area"]) + sw_etendue = 1 / spatial.pdf(1) + assert sw_etendue == approx(ref["solid_angle"] * ref["injection_area"], 1e-5) + assert ref["energy_factor"] == approx(ref["energy_integral"] * w.get_weight_column("energy")) + assert ref["final_weight"] == approx(w.get_weights(1), 1e-5) + + fobj.close() if __name__ == "__main__": From bc118d2a1bfbd4aba039f097ff44e4edcc70fc85 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:22:19 -0600 Subject: [PATCH 04/20] enable nuflux for older version of python --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 967d5cc..7d0483e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,8 +32,8 @@ jobs: cache-dependency-path: pyproject.toml - name: Install SimWeights run: | - if [ "${{matrix.python-version}}" == "3.9" ]; then - python3 -m pip install "numpy<2" + if [ "${{matrix.python-version}}" != "3.13" ]; then + python3 -m pip install nuflux fi python3 -m pip install flit python3 -m flit install --symlink --deps=production --extras=test From 789a37b96c60a35f9f83ecbece5874d27e2289bf Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Fri, 10 Jan 2025 14:28:38 -0600 Subject: [PATCH 05/20] test i3file interface in docker --- .github/workflows/tests.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d0483e..18e09d3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -60,6 +60,32 @@ jobs: with: fail_ci_if_error: false verbose: true + TestsIceTray: + runs-on: ubuntu-latest + container: icecube/icetray:icetray-devel-current-ubuntu22.04-X64 + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v4 + - name: Install SimWeights + run: python3 -m pip install .[test] + - name: Download Test Data + run: | + curl -u icecube:${{ secrets.ICECUBE_PASSWORD }} https://convey.icecube.wisc.edu/data/ana/Software/simweights/test-data/simweights_testdata.tar.gz -O + tar xzvf simweights_testdata.tar.gz + - name: Run Unit Tests + env: + SIMWEIGHTS_TESTDATA: . + run: | + echo i3src=$I3_SRC i3build$I3_BUILD + python -m pytest --junit-xml=test-results-icetray.junit.xml + - name: Upload Test Results + uses: actions/upload-artifact@v4 + if: always() + with: + if-no-files-found: error + name: test-results-icetray.junit.xml + path: test-results-icetray.junit.xml publish-test-results: name: "Publish Tests Results" needs: Tests From 8ce53561d0f599c76795e9e41284016887878565 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Fri, 10 Jan 2025 14:58:15 -0600 Subject: [PATCH 06/20] manually load entry point for docker --- .github/workflows/tests.yml | 4 +--- src/simweights/_utils.py | 6 +++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 18e09d3..fefe8dc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -76,9 +76,7 @@ jobs: - name: Run Unit Tests env: SIMWEIGHTS_TESTDATA: . - run: | - echo i3src=$I3_SRC i3build$I3_BUILD - python -m pytest --junit-xml=test-results-icetray.junit.xml + run: /opt/icetray/bin/icetray-shell python -m pytest --junit-xml=test-results-icetray.junit.xml - name: Upload Test Results uses: actions/upload-artifact@v4 if: always() diff --git a/src/simweights/_utils.py b/src/simweights/_utils.py index c52e498..52d4709 100644 --- a/src/simweights/_utils.py +++ b/src/simweights/_utils.py @@ -74,6 +74,8 @@ def has_column(table: Any, name: str) -> bool: """Helper function for determining if a table has a column, works with h5py, pytables, and pandas.""" if hasattr(table, "cols"): return hasattr(table.cols, name) + if hasattr(table, name): + return True try: table[name] # pylint: disable=pointless-statement return True # noqa: TRY300 @@ -81,10 +83,12 @@ def has_column(table: Any, name: str) -> bool: return False -def get_column(table: Any, name: str) -> NDArray[np.float64]: +def get_column(table: Any, name: str) -> Any: """Helper function getting a column from a table, works with h5py, pytables, and pandas.""" if hasattr(table, "cols"): return np.asarray(getattr(table.cols, name)[:], dtype=np.float64) + if hasattr(table, name): + return np.atleast_1d(getattr(table, name)) column = table[name] if hasattr(column, "array") and callable(column.array): return np.asarray(column.array(library="np"), dtype=np.float64) From 46e0e40da99b109be60bf5809b8037b138437f52 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:01:25 -0600 Subject: [PATCH 07/20] use non old pytest --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fefe8dc..c040369 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -68,7 +68,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install SimWeights - run: python3 -m pip install .[test] + run: python3 -m pip install 'pytest>7' .[test] - name: Download Test Data run: | curl -u icecube:${{ secrets.ICECUBE_PASSWORD }} https://convey.icecube.wisc.edu/data/ana/Software/simweights/test-data/simweights_testdata.tar.gz -O From 65768cad4ebc45e796a4fef8bb0b5a95ccff8933 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:12:34 -0600 Subject: [PATCH 08/20] fix booking script for test data use a single subeventstream instead of conditionally using null don't use drop orphan streams which was causing problems --- contrib/book_simweights_testdata.py | 167 +++++++--------------------- 1 file changed, 42 insertions(+), 125 deletions(-) diff --git a/contrib/book_simweights_testdata.py b/contrib/book_simweights_testdata.py index 4e76e34..f649a4c 100755 --- a/contrib/book_simweights_testdata.py +++ b/contrib/book_simweights_testdata.py @@ -29,123 +29,45 @@ def fake_event_header(frame: dict) -> None: filelist = { "corsika": [ - ( - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/12602/0000000-0000999/Level2_IC86.2015_corsika.012602.000000.i3.bz2", - False, - ), - ( - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20014/0000000-0000999/Level2_IC86.2015_corsika.020014.000000.i3.bz2", - False, - ), - ( - "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20021/0000000-0000999/Level2_IC86.2015_corsika.020021.000000.i3.bz2", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20208/0000000-0000999/Level2_IC86.2016_corsika.020208.000001.i3.bz2", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20243/0000000-0000999/Level2_IC86.2016_corsika.020243.000001.i3.bz2", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20263/0000000-0000999/Level2_IC86.2016_corsika.020263.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20777/0000000-0000999/Level2_IC86.2016_corsika.020777.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20778/0000000-0000999/Level2_IC86.2016_corsika.020778.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20780/0000000-0000999/Level2_IC86.2016_corsika.020780.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999/Level2_IC86.2016_corsika.021889.000000.i3.zst", - False, - ), + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/12602/0000000-0000999/Level2_IC86.2015_corsika.012602.000000.i3.bz2", + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20014/0000000-0000999/Level2_IC86.2015_corsika.020014.000000.i3.bz2", + "/data/sim/IceCube/2015/filtered/level2/CORSIKA-in-ice/20021/0000000-0000999/Level2_IC86.2015_corsika.020021.000000.i3.bz2", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20208/0000000-0000999/Level2_IC86.2016_corsika.020208.000001.i3.bz2", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20243/0000000-0000999/Level2_IC86.2016_corsika.020243.000001.i3.bz2", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20263/0000000-0000999/Level2_IC86.2016_corsika.020263.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20777/0000000-0000999/Level2_IC86.2016_corsika.020777.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20778/0000000-0000999/Level2_IC86.2016_corsika.020778.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/20780/0000000-0000999/Level2_IC86.2016_corsika.020780.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999/Level2_IC86.2016_corsika.021889.000000.i3.zst", ], "nugen": [ - ( - "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10634/00000-00999/Level2_IC86.2011_nugen_NuMu.010634.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10692/00000-00999/Level2_IC86.2011_nugen_NuE.010692.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11029/00000-00999/Level2_nugen_numu_IC86.2012.011029.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11065/00000-00999/Level2_IC86.2012_nugen_NuTau.011065.000001.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11069/00000-00999/Level2_nugen_numu_IC86.2012.011069.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11070/00000-00999/Level2_nugen_numu_IC86.2012.011070.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11297/00000-00999/Level2_nugen_nutau_IC86.2012.011297.000000.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11374/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_numu.011374.000050.clsim-base-4.0.3.0.99_eff.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11477/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011477.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11836/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011836.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/12646/0000000-0000999/clsim-base-4.0.5.0.99_eff/Level2_IC86.2012_nugen_nue.012646.000000.clsim-base-4.0.5.0.99_eff.i3.bz2", - True, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20878/0000000-0000999/Level2_IC86.2016_NuMu.020878.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20885/0000000-0000999/Level2_IC86.2016_NuE.020885.000000.i3.zst", - False, - ), - ( - "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20895/0000000-0000999/Level2_IC86.2016_NuTau.020895.000000.i3.zst", - False, - ), + "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10634/00000-00999/Level2_IC86.2011_nugen_NuMu.010634.000000.i3.bz2", + "/data/sim/IceCube/2011/filtered/level2/neutrino-generator/10692/00000-00999/Level2_IC86.2011_nugen_NuE.010692.000000.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11029/00000-00999/Level2_nugen_numu_IC86.2012.011029.000000.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11065/00000-00999/Level2_IC86.2012_nugen_NuTau.011065.000001.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11069/00000-00999/Level2_nugen_numu_IC86.2012.011069.000000.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11070/00000-00999/Level2_nugen_numu_IC86.2012.011070.000000.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11297/00000-00999/Level2_nugen_nutau_IC86.2012.011297.000000.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11374/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_numu.011374.000050.clsim-base-4.0.3.0.99_eff.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11477/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011477.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/11836/00000-00999/clsim-base-4.0.3.0.99_eff/Level2_IC86.2012_nugen_nutau.011836.000000.clsim-base-4.0.3.0.99_eff.i3.bz2", + "/data/sim/IceCube/2012/filtered/level2/neutrino-generator/12646/0000000-0000999/clsim-base-4.0.5.0.99_eff/Level2_IC86.2012_nugen_nue.012646.000000.clsim-base-4.0.5.0.99_eff.i3.bz2", + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20878/0000000-0000999/Level2_IC86.2016_NuMu.020878.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20885/0000000-0000999/Level2_IC86.2016_NuE.020885.000000.i3.zst", + "/data/sim/IceCube/2016/filtered/level2/neutrino-generator/20895/0000000-0000999/Level2_IC86.2016_NuTau.020895.000000.i3.zst", ], "icetop": [ - ( - "/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/Level3_IC86.2012_SIBYLL2.1_p_12360_E6.0_0.i3.bz2", - True, - ), + "/data/ana/CosmicRay/IceTop_level3/sim/IC86.2012/SIBYLL2.1/p/12360_v1s/Level3_IC86.2012_SIBYLL2.1_p_12360_E6.0_0.i3.bz2", ], "genie": [ - ("/data/sim/IceCubeUpgrade/genie/step3/141828/upgrade_genie_step3_141828_000000.i3.zst", True), - ( - "/data/sim/IceCube/2023/generated/GENIE/22590/0000000-0000999/GENIE_NuMu_IceCubeUpgrade_v58.22590.000000.i3.zst", - True, - ), - ("/data/ana/Software/simweights/test-data/genie_numu_volume_scaling.i3.zst", True), - ("/data/ana/Software/simweights/test-data/genie-icetray.140000A_000000.i3.zst", True), - ("/data/ana/Software/simweights/test-data/genie-icetray.140000B_000000.i3.zst", True), - ("/data/ana/Software/simweights/test-data/genie-icetray.140000C_000000.i3.zst", True), - ("/data/ana/Software/simweights/test-data/genie-icetray.140000D_000000.i3.zst", True), - ("/data/ana/Software/simweights/test-data/level2_genie-icetray.140000_000000.i3.zst", True), + "/data/sim/IceCubeUpgrade/genie/step3/141828/upgrade_genie_step3_141828_000000.i3.zst", + "/data/sim/IceCube/2023/generated/GENIE/22590/0000000-0000999/GENIE_NuMu_IceCubeUpgrade_v58.22590.000000.i3.zst", + "/data/ana/Software/simweights/test-data/genie_numu_volume_scaling.i3.zst", + "/data/ana/Software/simweights/test-data/genie-icetray.140000A_000000.i3.zst", + "/data/ana/Software/simweights/test-data/genie-icetray.140000B_000000.i3.zst", + "/data/ana/Software/simweights/test-data/genie-icetray.140000C_000000.i3.zst", + "/data/ana/Software/simweights/test-data/genie-icetray.140000D_000000.i3.zst", + "/data/ana/Software/simweights/test-data/level2_genie-icetray.140000_000000.i3.zst", ], } keys = { @@ -166,14 +88,13 @@ def fake_event_header(frame: dict) -> None: "icetop": ["I3TopInjectorInfo", "MCPrimary"], } - if "notemp" in sys.argv: outdir = Path("/scratch/kmeagher/simweights/") else: tempdir = tempfile.TemporaryDirectory(prefix="simweights_testdata_") outdir = Path(tempdir.name) -for simtype, (filename, split) in ((i, x) for i in filelist for x in filelist[i]): +for simtype, filename in ((i, x) for i in filelist for x in filelist[i]): basename = Path(filename).name.replace(".i3.zst", "").replace(".i3.bz2", "").replace(".i3.gz", "") assert basename != Path(filename).name outfile = outdir / basename @@ -184,30 +105,26 @@ def fake_event_header(frame: dict) -> None: tray = icetray.I3Tray() tray.Add("I3Reader", FileNameList=[filename]) - - if split: - tray.Add( - fake_event_header, - Streams=[icetray.I3Frame.DAQ], - If=lambda f: "I3EventHeader" not in f, - ) - tray.Add("I3NullSplitter", SubEventStreamName="NullSplit") + tray.Add( + fake_event_header, + Streams=[icetray.I3Frame.DAQ], + If=lambda f: "I3EventHeader" not in f, + ) + tray.Add("I3NullSplitter", SubEventStreamName="weight") tray.Add( tableio.I3TableWriter, tableservice=[ hdfwriter.I3HDFTableService(str(outfile) + ".hdf5"), rootwriter.I3ROOTTableService(str(outfile) + ".root"), ], - SubEventStreams=["NullSplit"], + SubEventStreams=["weight"], keys=keys[simtype], ) tray.Add("Keep", keys=["I3EventHeader"] + keys[simtype]) - tray.AddModule(lambda frame: frame["I3EventHeader"].sub_event_stream == "NullSplit") tray.Add( "I3Writer", Filename=str(outfile) + ".i3.zst", - Streams=[icetray.I3Frame.Simulation, icetray.I3Frame.DAQ, icetray.I3Frame.Physics], - DropOrphanStreams=[icetray.I3Frame.Physics], + Streams=[icetray.I3Frame.Simulation, icetray.I3Frame.DAQ], ) tray.Execute() From c965c9933f779f2239a7d9fb103a40d3c1a6db5b Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:21:57 -0600 Subject: [PATCH 09/20] Add unit tests for nugen i3files --- pyproject.toml | 5 +- src/simweights/_utils.py | 22 +-- tests/test_corsika_datasets.py | 39 ++++++ tests/test_genie_datasets.py | 2 +- tests/test_nugen_datasets.py | 2 +- tests/test_nugen_weighter.py | 249 +++++++++++++++++++++++---------- 6 files changed, 232 insertions(+), 87 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5a9e6bb..40139c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ warn_unreachable = true max-line-length = "128" [tool.pylint.messages_control] -disable = "C0114,R0902,R0913,R0917,R0914" +disable = "C0114,R0902,R0913,R0917,R0914,R0911" [tool.pytest.ini_options] addopts = ["-ra", "--strict-config", "--strict-markers", "--cov=simweights", "-W ignore"] @@ -99,7 +99,8 @@ ignore = [ "S101", # assert-used "COM812", # conflicts with ruff formatter "ISC001", # conflicts with ruff formatter - "PLR0913" # Too many arguments in function definition + "PLR0913", # Too many arguments in function definition + "PLR0911" # Too many return statement ] select = ["ALL"] diff --git a/src/simweights/_utils.py b/src/simweights/_utils.py index 52d4709..d58f7fd 100644 --- a/src/simweights/_utils.py +++ b/src/simweights/_utils.py @@ -74,13 +74,13 @@ def has_column(table: Any, name: str) -> bool: """Helper function for determining if a table has a column, works with h5py, pytables, and pandas.""" if hasattr(table, "cols"): return hasattr(table.cols, name) - if hasattr(table, name): + if hasattr(table, "__getitem__"): + try: + get_column(table, name) + except (AttributeError, KeyError, ValueError, TypeError): + return False return True - try: - table[name] # pylint: disable=pointless-statement - return True # noqa: TRY300 - except (ValueError, KeyError): - return False + return False def get_column(table: Any, name: str) -> Any: @@ -88,11 +88,17 @@ def get_column(table: Any, name: str) -> Any: if hasattr(table, "cols"): return np.asarray(getattr(table.cols, name)[:], dtype=np.float64) if hasattr(table, name): - return np.atleast_1d(getattr(table, name)) + return np.asarray(np.atleast_1d(getattr(table, name)), dtype=np.float64) + if hasattr(table, "dir") and hasattr(table.dir, name): + return np.asarray(np.atleast_1d(getattr(table.dir, name)), dtype=np.float64) + if hasattr(table, "primary") and hasattr(table.primary, name): + return np.asarray(np.atleast_1d(getattr(table.primary, name)), dtype=np.float64) + if hasattr(table, "primary") and hasattr(table.primary.dir, name): + return np.asarray(np.atleast_1d(getattr(table.primary.dir, name)), dtype=np.float64) column = table[name] if hasattr(column, "array") and callable(column.array): return np.asarray(column.array(library="np"), dtype=np.float64) - return np.asarray(column, dtype=np.float64) + return np.asarray(np.atleast_1d(column), dtype=np.float64) def constcol(table: Any, colname: str, mask: Any = None) -> float: diff --git a/tests/test_corsika_datasets.py b/tests/test_corsika_datasets.py index feef9e3..fcf5768 100755 --- a/tests/test_corsika_datasets.py +++ b/tests/test_corsika_datasets.py @@ -4,6 +4,7 @@ # # SPDX-License-Identifier: BSD-2-Clause +import contextlib import os import sys from pathlib import Path @@ -18,6 +19,9 @@ from simweights import CorsikaWeighter, GaisserH4a from simweights._utils import constcol +with contextlib.suppress(ImportError): + from icecube import dataio, simclasses # noqa: F401 + flux = GaisserH4a() datadir = os.environ.get("SIMWEIGHTS_TESTDATA", None) if datadir: @@ -121,5 +125,40 @@ def test_dataset(triggered, fname, rate, loader): infile.close() +@pytest.mark.parametrize(("triggered", "fname", "rate"), datasets) +@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") +@pytest.mark.skipif("dataio" not in globals(), reason="Not in an IceTray environment") +def test_dataset_i3file(triggered, fname, rate): + fname = datadir / fname + + reffile = h5py.File(str(fname) + ".hdf5", "r") + if triggered: + nfiles = None + refweight = triggered_weights + counts = np.unique(reffile["I3PrimaryInjectorInfo"]["primary_type"], return_counts=True) + s_frame_counts = {counts[0][i]: counts[1][i] for i in range(len(counts[0]))} + else: + nfiles = 1 + refweight = untriggered_weights + s_frame_counts = {c: 1 for c in set(reffile["CorsikaWeightMap"]["PrimaryType"])} + + w0 = refweight(reffile) + f = dataio.I3File(str(fname) + ".i3.zst") + i = 0 + W = 0 + while f.more(): + frame = f.pop_frame() + if frame.Stop != frame.DAQ: + continue + ww = CorsikaWeighter(frame, nfiles) + pdgid = ww.get_weight_column("pdgid")[0] + w = ww.get_weights(flux) / s_frame_counts[pdgid] + assert w == pytest.approx(w0[i], 1e-6) + i += 1 + W += w + + assert rate == pytest.approx(W, 1e-6) + + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) diff --git a/tests/test_genie_datasets.py b/tests/test_genie_datasets.py index a22407d..7d41c05 100755 --- a/tests/test_genie_datasets.py +++ b/tests/test_genie_datasets.py @@ -119,7 +119,7 @@ def test_dataset(fname, loader): @pytest.mark.parametrize("fname", datasets) @pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -@pytest.mark.skipif("dataio" not in globals(), reason="not in icetray environment") +@pytest.mark.skipif("dataio" not in globals(), reason="Not in an IceTray environment") def test_dataset_i3file(fname): filename = Path(datadir) / fname ref = load_reference_values(filename) diff --git a/tests/test_nugen_datasets.py b/tests/test_nugen_datasets.py index 454914d..06fc299 100755 --- a/tests/test_nugen_datasets.py +++ b/tests/test_nugen_datasets.py @@ -107,7 +107,7 @@ def test_dataset(fname, loader): @pytest.mark.parametrize("fname", datasets) @pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set") -@pytest.mark.skipif("dataio" not in globals(), reason="not in icetray environment") +@pytest.mark.skipif("dataio" not in globals(), reason="Not in an IceTray environment") def test_dataset_i3file(fname): d = load_reference_values(Path(datadir) / (fname + ".hdf5")) diff --git a/tests/test_nugen_weighter.py b/tests/test_nugen_weighter.py index 524f516..7a69fbb 100755 --- a/tests/test_nugen_weighter.py +++ b/tests/test_nugen_weighter.py @@ -4,12 +4,19 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np import pandas as pd +import pytest +from numpy.testing import assert_allclose +from pytest import approx -from simweights import CircleInjector, NaturalRateCylinder, NuGenWeighter, PowerLaw +from simweights import CircleInjector, NuGenWeighter, PowerLaw, UniformSolidAngleCylinder + +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray base_keys = [ "NEvents", @@ -41,79 +48,171 @@ def make_new_table(pdgid, nevents, spatial, spectrum): return weight -class TestNugenWeighter(unittest.TestCase): - def test_nugen_energy_post_V6(self): - p1 = PowerLaw(0, 1e3, 1e4) - c1 = NaturalRateCylinder(200, 100, 0, 0.001) - t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) - t1["CylinderHeight"] = c1.length - t1["CylinderRadius"] = c1.radius - t1["TypeWeight"] = 0.5 - - for weight in 0.1, 1, 10: - t1["TotalWeight"] = weight - f1 = {"I3MCWeightDict": t1} - for nfiles in [1, 10, 100]: - wf = NuGenWeighter(f1, nfiles=nfiles) - for flux in [1e-6, 1, 1e6]: - w1 = wf.get_weights(flux) - np.testing.assert_allclose( - w1.sum(), - 2 * weight * flux * p1.integral * c1.etendue / nfiles, - ) - E = t1["PrimaryNeutrinoEnergy"] - y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, 2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) - - def test_nugen_energy_pre_V6(self): - p1 = PowerLaw(0, 1e3, 1e4) - c1 = NaturalRateCylinder(1900, 950, 0, 0.001) - t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) - for weight in 0.1, 1, 10: - t1["TotalWeight"] = weight - t1["InjectionSurfaceR"] = -1 - t1["TypeWeight"] = 0.5 - f1 = {"I3MCWeightDict": t1} - for nfiles in [1, 10, 100]: - wf = NuGenWeighter(f1, nfiles=nfiles) - for flux in [1e-6, 1, 1e6]: - w1 = wf.get_weights(flux) - np.testing.assert_allclose( - w1.sum(), - 2 * weight * flux * p1.integral * c1.etendue / nfiles, - ) - E = t1["PrimaryNeutrinoEnergy"] - y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, 2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) - - def test_nugen_energy_pre_V04_00(self): - p1 = PowerLaw(0, 1e3, 1e4) - c1 = CircleInjector(500, 0, 0.001) - t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) - for weight in 0.1, 1, 10: - t1["TotalInteractionProbabilityWeight"] = weight - t1["InjectionSurfaceR"] = c1.radius - f1 = {"I3MCWeightDict": t1} - for nfiles in [1, 10, 100]: - wf = NuGenWeighter(f1, nfiles=nfiles) - for flux in [1e-6, 1, 1e6]: - w1 = wf.get_weights(flux) - np.testing.assert_allclose( - w1.sum(), - 2 * weight * flux * p1.integral * c1.etendue / nfiles, - ) - E = t1["PrimaryNeutrinoEnergy"] - y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, 2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) - - def test_empty(self): - with self.assertRaises(RuntimeError): - x = {"I3MCWeightDict": {"PrimaryNeutrinoType": []}} - NuGenWeighter(x, nfiles=1) +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +def test_nugen_energy_post_V6(weight, nfiles, flux): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = UniformSolidAngleCylinder(200, 100, 0, 0.001) + t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) + t1["CylinderHeight"] = c1.length + t1["CylinderRadius"] = c1.radius + t1["TypeWeight"] = 0.5 + t1["TotalWeight"] = weight + f1 = {"I3MCWeightDict": t1} + wf = NuGenWeighter(f1, nfiles=nfiles) + + w1 = wf.get_weights(flux) + assert_allclose( + w1[1:], + flux + * weight + / ( + t1["TypeWeight"] + * t1["NEvents"] + * nfiles + * c1.pdf(np.cos(t1["PrimaryNeutrinoZenith"])) + * p1.pdf(t1["PrimaryNeutrinoEnergy"]) + )[1:], + ) + assert w1.sum() == approx(2 * weight * flux * p1.integral * c1.etendue / nfiles) + E = t1["PrimaryNeutrinoEnergy"] + y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + assert y == approx(2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) + + +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +def test_nugen_energy_pre_V6(weight, nfiles, flux): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = UniformSolidAngleCylinder(1900, 950, 0, 0.001) + t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) + t1["TotalWeight"] = weight + t1["InjectionSurfaceR"] = -1 + t1["TypeWeight"] = 0.5 + f1 = {"I3MCWeightDict": t1} + wf = NuGenWeighter(f1, nfiles=nfiles) + w1 = wf.get_weights(flux) + assert_allclose( + w1[1:], + flux + * weight + / ( + t1["TypeWeight"] + * t1["NEvents"] + * nfiles + * c1.pdf(np.cos(t1["PrimaryNeutrinoZenith"])) + * p1.pdf(t1["PrimaryNeutrinoEnergy"]) + )[1:], + ) + assert w1.sum() == approx( + 2 * weight * flux * p1.integral * c1.etendue / nfiles, + ) + E = t1["PrimaryNeutrinoEnergy"] + y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + assert y == approx(2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) + + +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +def test_nugen_energy_pre_V04_00(weight, nfiles, flux): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = CircleInjector(500, 0, 0.001) + t1 = pd.DataFrame(make_new_table(14, 10000, c1, p1)) + t1["TotalInteractionProbabilityWeight"] = weight + t1["InjectionSurfaceR"] = c1.radius + f1 = {"I3MCWeightDict": t1} + wf = NuGenWeighter(f1, nfiles=nfiles) + w1 = wf.get_weights(flux) + + assert_allclose( + w1[1:], + flux + * weight + / (0.5 * t1["NEvents"] * nfiles * c1.pdf(np.cos(t1["PrimaryNeutrinoZenith"])) * p1.pdf(t1["PrimaryNeutrinoEnergy"]))[ + 1: + ], + ) + assert w1.sum() == approx( + 2 * weight * flux * p1.integral * c1.etendue / nfiles, + ) + E = t1["PrimaryNeutrinoEnergy"] + y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + assert y == approx(2 * weight * flux * Ewidth * c1.etendue / nfiles, 6e-3) + + +def test_empty(): + with pytest.raises(RuntimeError): + x = {"I3MCWeightDict": {"PrimaryNeutrinoType": []}} + NuGenWeighter(x, nfiles=1) + + +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") +def test_i3file_pre_V04_00(weight, flux): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = CircleInjector(500, 0, 1) + t1 = make_new_table(14, 1, c1, p1) + mcw = dataclasses.I3MapStringDouble({n: t1[n][0] for n in t1.dtype.names}) + mcw["TotalInteractionProbabilityWeight"] = weight + mcw["InjectionSurfaceR"] = c1.radius + f1 = icetray.I3Frame() + f1["I3MCWeightDict"] = mcw + wf = NuGenWeighter(f1, nfiles=1) + w1 = wf.get_weights(flux) + assert w1 == approx( + flux * weight / (0.5 * c1.pdf(np.cos(mcw["PrimaryNeutrinoZenith"])) * p1.pdf(mcw["PrimaryNeutrinoEnergy"])) + ) + + +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("typeweight", (0.3, 0.5, 0.7)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") +def test_i3file_pre_V6(weight, flux, typeweight): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = UniformSolidAngleCylinder(1900, 950, 0, 1) + t1 = make_new_table(14, 1, c1, p1) + mcw = dataclasses.I3MapStringDouble({n: t1[n][0] for n in t1.dtype.names}) + mcw["TotalWeight"] = weight + mcw["InjectionSurfaceR"] = -1 + mcw["TypeWeight"] = typeweight + f1 = icetray.I3Frame() + f1["I3MCWeightDict"] = mcw + wf = NuGenWeighter(f1, nfiles=1) + w1 = wf.get_weights(flux) + assert w1 == approx( + flux * weight / (typeweight * c1.pdf(np.cos(mcw["PrimaryNeutrinoZenith"])) * p1.pdf(mcw["PrimaryNeutrinoEnergy"])) + ) + + +@pytest.mark.parametrize("weight", (0.1, 1, 10)) +@pytest.mark.parametrize("typeweight", (0.3, 0.5, 0.7)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") +def test_i3file_post_V6(weight, flux, typeweight): + p1 = PowerLaw(0, 1e3, 1e4) + c1 = UniformSolidAngleCylinder(200, 100, 0, 0.1) + t1 = make_new_table(14, 1, c1, p1) + mcw = dataclasses.I3MapStringDouble({n: t1[n][0] for n in t1.dtype.names}) + mcw["CylinderHeight"] = c1.length + mcw["CylinderRadius"] = c1.radius + mcw["TypeWeight"] = typeweight + mcw["TotalWeight"] = weight + f1 = icetray.I3Frame() + f1["I3MCWeightDict"] = mcw + wf = NuGenWeighter(f1, nfiles=1) + w1 = wf.get_weights(flux) + assert w1 == approx( + flux * weight / (typeweight * c1.pdf(np.cos(mcw["PrimaryNeutrinoZenith"])) * p1.pdf(mcw["PrimaryNeutrinoEnergy"])) + ) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) From 4db6db19daeb2edf3aebfedef89c0036d5b6116c Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:37:02 -0600 Subject: [PATCH 10/20] convert test_corsika_weighter to pytest --- tests/test_corsika_weighter.py | 274 +++++++++++++++++---------------- 1 file changed, 138 insertions(+), 136 deletions(-) diff --git a/tests/test_corsika_weighter.py b/tests/test_corsika_weighter.py index f18ff8c..73dbb22 100755 --- a/tests/test_corsika_weighter.py +++ b/tests/test_corsika_weighter.py @@ -4,9 +4,10 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import sys import numpy as np +import pytest from scipy.interpolate import interp1d import simweights @@ -74,141 +75,142 @@ def make_corsika_data(pdgid, nevents, c, p): return {"CorsikaWeightMap": weight, "PolyplopiaPrimary": primary} -class TestCorsikaWeighter(unittest.TestCase): - def test_old_corsika(self): - c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - d = make_corsika_data(2212, 10000, c1, p1) - - for oversampling in [1, 5, 50]: - d["CorsikaWeightMap"]["OverSampling"] = oversampling - for nfiles in [1, 10, 100]: - wobj = CorsikaWeighter(d, nfiles=nfiles) - - for flux in [0.1, 1, 10]: - w = wobj.get_weights(flux) - np.testing.assert_allclose( - w.sum(), - flux * c1.etendue * p1.integral / nfiles / oversampling, - ) - E = d["PolyplopiaPrimary"]["energy"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles / oversampling, 5e-3) - - with self.assertRaises(RuntimeError): - CorsikaWeighter(d) - - with self.assertRaises(TypeError): - CorsikaWeighter(d, nfiles=object()) - - with self.assertRaises(RuntimeError): - x = {"CorsikaWeightMap": {"ParticleType": []}, "PolyplopiaPrimary": {}} - CorsikaWeighter(x, nfiles=1) - - def test_sframe_corsika(self): - c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - d = make_corsika_data(2212, 10000, c1, p1) - for oversampling in [1, 5, 50]: - for nfiles in [1, 10, 100]: - rows = nfiles * [ - ( - 10000, - 2212, - oversampling, - c1.length, - c1.radius, - np.arccos(c1.cos_zen_max), - np.arccos(c1.cos_zen_min), - p1.a, - p1.b, - p1.g, - ), - ] - d["I3CorsikaInfo"] = np.array(rows, dtype=info_dtype) - wobj = CorsikaWeighter(d) - - for flux in [0.1, 1, 10]: - w = wobj.get_weights(flux) - np.testing.assert_allclose( - w.sum(), - flux * c1.etendue * p1.integral / nfiles / oversampling, - ) - E = d["PolyplopiaPrimary"]["energy"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles / oversampling, 5e-3) - - with self.assertWarns(UserWarning): - CorsikaWeighter(d, nfiles=10) - - def test_triggered_corsika(self): - weight_dtype = [ - ("type", np.int32), - ("energy", np.float64), - ("zenith", np.float64), - ("weight", np.float64), - ] - info_dtype = [ - ("primary_type", np.int32), - ("n_events", np.int32), - ("cylinder_height", np.float64), - ("cylinder_radius", np.float64), - ("min_zenith", np.float64), - ("max_zenith", np.float64), - ("min_energy", np.float64), - ("max_energy", np.float64), - ("power_law_index", np.float64), - ] - - nevents = 10000 - c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - d = make_corsika_data(2212, 10000, c1, p1) - weight = np.zeros(nevents, dtype=weight_dtype) - weight["type"] = 2212 - weight["zenith"] = np.arccos(get_cos_zenith_dist(c1, nevents)) - weight["energy"] = p1.ppf(np.linspace(0, 1, nevents)) - - for event_weight in [1e-6, 1e-3, 1]: - weight["weight"] = event_weight - - for nfiles in [1, 5, 50]: - rows = nfiles * [ - ( - 2212, - nevents, - c1.length, - c1.radius, - np.arccos(c1.cos_zen_max), - np.arccos(c1.cos_zen_min), - p1.a, - p1.b, - p1.g, - ), - ] - info = np.array(rows, dtype=info_dtype) - d = {"I3CorsikaWeight": weight, "I3PrimaryInjectorInfo": info} - - for flux in [0.1, 1, 10]: - wobj = CorsikaWeighter(d) - w = wobj.get_weights(flux) - np.testing.assert_allclose( - w.sum(), - flux * event_weight * c1.etendue * p1.integral / nfiles, - ) - E = d["I3CorsikaWeight"]["energy"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) - - with self.assertRaises(RuntimeError): - CorsikaWeighter(d, nfiles=10) - - with self.assertRaises(RuntimeError): - CorsikaWeighter({"I3CorsikaWeight": weight}) +@pytest.mark.parametrize("oversampling", (1, 5, 50)) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_old_corsika(oversampling, nfiles, flux): + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 10000, c1, p1) + + d["CorsikaWeightMap"]["OverSampling"] = oversampling + wobj = CorsikaWeighter(d, nfiles=nfiles) + + w = wobj.get_weights(flux) + np.testing.assert_allclose( + w.sum(), + flux * c1.etendue * p1.integral / nfiles / oversampling, + ) + E = d["PolyplopiaPrimary"]["energy"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles / oversampling, 5e-3) + + with pytest.raises(RuntimeError): + CorsikaWeighter(d) + + with pytest.raises(TypeError): + CorsikaWeighter(d, nfiles=object()) + + with pytest.raises(RuntimeError): + x = {"CorsikaWeightMap": {"ParticleType": []}, "PolyplopiaPrimary": {}} + CorsikaWeighter(x, nfiles=1) + + +@pytest.mark.parametrize("oversampling", (1, 5, 50)) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_sframe_corsika(oversampling, nfiles, flux): + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 10000, c1, p1) + rows = nfiles * [ + ( + 10000, + 2212, + oversampling, + c1.length, + c1.radius, + np.arccos(c1.cos_zen_max), + np.arccos(c1.cos_zen_min), + p1.a, + p1.b, + p1.g, + ), + ] + d["I3CorsikaInfo"] = np.array(rows, dtype=info_dtype) + wobj = CorsikaWeighter(d) + + w = wobj.get_weights(flux) + np.testing.assert_allclose( + w.sum(), + flux * c1.etendue * p1.integral / nfiles / oversampling, + ) + E = d["PolyplopiaPrimary"]["energy"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles / oversampling, 5e-3) + + with pytest.warns(UserWarning): + CorsikaWeighter(d, nfiles=10) + + +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("nfiles", (1, 5, 50)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_triggered_corsika(event_weight, nfiles, flux): + weight_dtype = [ + ("type", np.int32), + ("energy", np.float64), + ("zenith", np.float64), + ("weight", np.float64), + ] + info_dtype = [ + ("primary_type", np.int32), + ("n_events", np.int32), + ("cylinder_height", np.float64), + ("cylinder_radius", np.float64), + ("min_zenith", np.float64), + ("max_zenith", np.float64), + ("min_energy", np.float64), + ("max_energy", np.float64), + ("power_law_index", np.float64), + ] + + nevents = 10000 + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 10000, c1, p1) + weight = np.zeros(nevents, dtype=weight_dtype) + weight["type"] = 2212 + weight["zenith"] = np.arccos(get_cos_zenith_dist(c1, nevents)) + weight["energy"] = p1.ppf(np.linspace(0, 1, nevents)) + + weight["weight"] = event_weight + + rows = nfiles * [ + ( + 2212, + nevents, + c1.length, + c1.radius, + np.arccos(c1.cos_zen_max), + np.arccos(c1.cos_zen_min), + p1.a, + p1.b, + p1.g, + ), + ] + info = np.array(rows, dtype=info_dtype) + d = {"I3CorsikaWeight": weight, "I3PrimaryInjectorInfo": info} + + wobj = CorsikaWeighter(d) + w = wobj.get_weights(flux) + np.testing.assert_allclose( + w.sum(), + flux * event_weight * c1.etendue * p1.integral / nfiles, + ) + E = d["I3CorsikaWeight"]["energy"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) + + with pytest.raises(RuntimeError): + CorsikaWeighter(d, nfiles=10) + + with pytest.raises(RuntimeError): + CorsikaWeighter({"I3CorsikaWeight": weight}) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) From e6910da0ef2adc76ee5ee8be132196a3d5d4f631 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:05:57 -0600 Subject: [PATCH 11/20] unittests for corsika i3files --- src/simweights/_utils.py | 14 ++--- tests/test_corsika_weighter.py | 98 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 9 deletions(-) diff --git a/src/simweights/_utils.py b/src/simweights/_utils.py index d58f7fd..de16b94 100644 --- a/src/simweights/_utils.py +++ b/src/simweights/_utils.py @@ -72,15 +72,11 @@ def get_table(file_obj: Any, name: str) -> Any: def has_column(table: Any, name: str) -> bool: """Helper function for determining if a table has a column, works with h5py, pytables, and pandas.""" - if hasattr(table, "cols"): - return hasattr(table.cols, name) - if hasattr(table, "__getitem__"): - try: - get_column(table, name) - except (AttributeError, KeyError, ValueError, TypeError): - return False - return True - return False + try: + get_column(table, name) + except (AttributeError, KeyError, ValueError, TypeError): + return False + return True def get_column(table: Any, name: str) -> Any: diff --git a/tests/test_corsika_weighter.py b/tests/test_corsika_weighter.py index 73dbb22..ec200e2 100755 --- a/tests/test_corsika_weighter.py +++ b/tests/test_corsika_weighter.py @@ -4,15 +4,20 @@ # # SPDX-License-Identifier: BSD-2-Clause +import contextlib import sys import numpy as np import pytest +from pytest import approx from scipy.interpolate import interp1d import simweights from simweights import CorsikaWeighter +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + info_dtype = [ ("n_events", np.int32), ("primary_type", np.int32), @@ -212,5 +217,98 @@ def test_triggered_corsika(event_weight, nfiles, flux): CorsikaWeighter({"I3CorsikaWeight": weight}) +@pytest.mark.parametrize("oversampling", (1, 5, 50)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") +def test_old_corsika_i3file(oversampling, flux): + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 1, c1, p1) + d["CorsikaWeightMap"]["OverSampling"] = oversampling + + cwm = d["CorsikaWeightMap"] + wm = dataclasses.I3MapStringDouble({k: float(cwm[k][0]) for k in cwm.dtype.names}) + pp = dataclasses.I3Particle() + pp.type = pp.ParticleType(d["PolyplopiaPrimary"]["type"][0]) + pp.energy = d["PolyplopiaPrimary"]["energy"][0] + pp.dir = dataclasses.I3Direction(d["PolyplopiaPrimary"]["zenith"][0], 0) + frame = icetray.I3Frame() + frame["CorsikaWeightMap"] = wm + frame["PolyplopiaPrimary"] = pp + wobj = CorsikaWeighter(frame, nfiles=1) + w = wobj.get_weights(flux) + assert w == approx(flux / c1.pdf(np.cos(pp.dir.zenith)) / p1.pdf(pp.energy) / oversampling) + + +@pytest.mark.parametrize("oversampling", (1, 10, 100, 1000)) +@pytest.mark.parametrize("n_events", (1, 10, 100)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_sframe_corsika_i3files(oversampling, n_events, flux): + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 1, c1, p1) + + info = simclasses.I3CorsikaInfo() + info.n_events = n_events + info.primary_type = dataclasses.I3Particle.ParticleType(2212) + info.oversampling = oversampling + info.cylinder_height = c1.length + info.cylinder_radius = c1.radius + info.min_zenith = np.arccos(c1.cos_zen_max) + info.max_zenith = np.arccos(c1.cos_zen_min) + info.power_law_index = p1.g + info.min_energy = p1.a + info.max_energy = p1.b + + pp = dataclasses.I3Particle() + pp.type = pp.ParticleType(d["PolyplopiaPrimary"]["type"][0]) + pp.energy = d["PolyplopiaPrimary"]["energy"][0] + pp.dir = dataclasses.I3Direction(d["PolyplopiaPrimary"]["zenith"][0], 0) + + frame = icetray.I3Frame() + frame["I3CorsikaInfo"] = info + frame["PolyplopiaPrimary"] = pp + w = CorsikaWeighter(frame).get_weights(flux) + assert w == approx(flux / c1.pdf(np.cos(pp.dir.zenith)) / p1.pdf(pp.energy) / n_events / oversampling) + + +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("nevents", (1, 5, 50)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_triggered_corsika_i3file(event_weight, nevents, flux): + c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + d = make_corsika_data(2212, 1, c1, p1) + + primary = dataclasses.I3Particle() + primary.type = primary.ParticleType(2212) + primary.dir = dataclasses.I3Direction(d["PolyplopiaPrimary"]["zenith"][0], 0) + primary.energy = d["PolyplopiaPrimary"]["energy"][0] + weight = simclasses.I3CorsikaWeight() + weight.primary = primary + weight.weight = event_weight + + # you can't set values for I3PrimaryInjectorInfo in python so lets just fake it + info = dataclasses.I3MapStringDouble( + { + "n_events": nevents, + "primary_type": 2212, + "cylinder_height": c1.length, + "cylinder_radius": c1.radius, + "min_zenith": np.arccos(c1.cos_zen_max), + "max_zenith": np.arccos(c1.cos_zen_min), + "min_energy": p1.a, + "max_energy": p1.b, + "power_law_index": p1.g, + } + ) + frame = icetray.I3Frame() + frame["I3CorsikaWeight"] = weight + frame["I3PrimaryInjectorInfo"] = info + + w = CorsikaWeighter(frame).get_weights(flux) + assert w == approx(flux * event_weight * c1.etendue * p1.integral / nevents) + + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) From 555ba94013518b4650859690abdb0466374f8ebd Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:08:14 -0600 Subject: [PATCH 12/20] don't run icectray tests unless icetray env also utils test --- tests/test_corsika_weighter.py | 2 ++ tests/test_util.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/tests/test_corsika_weighter.py b/tests/test_corsika_weighter.py index ec200e2..a0e302c 100755 --- a/tests/test_corsika_weighter.py +++ b/tests/test_corsika_weighter.py @@ -243,6 +243,7 @@ def test_old_corsika_i3file(oversampling, flux): @pytest.mark.parametrize("oversampling", (1, 10, 100, 1000)) @pytest.mark.parametrize("n_events", (1, 10, 100)) @pytest.mark.parametrize("flux", (0.1, 1, 10)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") def test_sframe_corsika_i3files(oversampling, n_events, flux): c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) p1 = simweights.PowerLaw(0, 1e3, 1e4) @@ -275,6 +276,7 @@ def test_sframe_corsika_i3files(oversampling, n_events, flux): @pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) @pytest.mark.parametrize("nevents", (1, 5, 50)) @pytest.mark.parametrize("flux", (0.1, 1, 10)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") def test_triggered_corsika_i3file(event_weight, nevents, flux): c1 = simweights.NaturalRateCylinder(1200, 600, 0, 1) p1 = simweights.PowerLaw(0, 1e3, 1e4) diff --git a/tests/test_util.py b/tests/test_util.py index de7ff3e..fdd86ef 100755 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -32,6 +32,8 @@ def test_table_and_column(self): f1 = SimpleNamespace(root=SimpleNamespace(x=t1)) t2 = {"a": np.full(10, 7), "b": np.arange(10)} f2 = {"x": t2} + p1 = SimpleNamespace(energy=1e5, dir=SimpleNamespace(zenith=1.57)) + w1 = SimpleNamespace(primary=p1) self.assertEqual(has_table(f1, "x"), True) self.assertEqual(has_table(f1, "y"), False) @@ -48,8 +50,16 @@ def test_table_and_column(self): self.assertEqual(has_column(t1, "a"), True) self.assertEqual(has_column(t1, "b"), True) self.assertEqual(has_column(t1, "c"), False) + self.assertEqual(has_column(p1, "energy"), True) + self.assertEqual(has_column(p1, "zenith"), True) + self.assertEqual(has_column(w1, "energy"), True) + self.assertEqual(has_column(w1, "zenith"), True) assert_array_equal(get_column(t1, "a"), 10 * [3]) assert_array_equal(get_column(t1, "b"), 5 * [3] + 5 * [4]) + assert_array_equal(get_column(p1, "energy"), [1e5]) + assert_array_equal(get_column(p1, "zenith"), [1.57]) + assert_array_equal(get_column(w1, "energy"), [1e5]) + assert_array_equal(get_column(w1, "zenith"), [1.57]) with self.assertRaises(AttributeError): get_column(t1, "c") From c4b51de99d1ab9841fa3a742e92be09e581f8ca6 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:32:31 -0600 Subject: [PATCH 13/20] i3file for test_icetop_weighter --- tests/test_icetop_weighter.py | 129 +++++++++++++++++++++------------- 1 file changed, 82 insertions(+), 47 deletions(-) diff --git a/tests/test_icetop_weighter.py b/tests/test_icetop_weighter.py index 55f61c0..a982f32 100755 --- a/tests/test_icetop_weighter.py +++ b/tests/test_icetop_weighter.py @@ -4,12 +4,18 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np +import pytest +from pytest import approx import simweights +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + info_dtype = [ ("primary_type", np.int32), ("n_events", np.int32), @@ -24,52 +30,81 @@ particle_dtype = [("type", np.int32), ("energy", np.float64), ("zenith", np.float64)] -class TestIceTopWeighter(unittest.TestCase): - def test_icetop_corsika(self): - nevents = 10000 - pdgid = 12 - c1 = simweights.NaturalRateCylinder(0, 300, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - - weight = np.zeros(nevents, dtype=particle_dtype) - weight["type"] = pdgid - weight["energy"] = p1.ppf(np.linspace(0, 1, nevents)) - weight["zenith"] = np.arccos(np.linspace(c1.cos_zen_min, c1.cos_zen_max, nevents)) - - for nfiles in [1, 5, 50]: - rows = nfiles * [ - ( - pdgid, - nevents, - c1.radius, - np.arccos(c1.cos_zen_max), - np.arccos(c1.cos_zen_min), - p1.a, - p1.b, - p1.g, - ), - ] - info = np.array(rows, dtype=info_dtype) - d = {"MCPrimary": weight, "I3TopInjectorInfo": info} - - for flux in [0.1, 1, 10]: - wobj = simweights.IceTopWeighter(d) - w = wobj.get_weights(flux) - np.testing.assert_allclose(w.sum(), flux * c1.etendue * p1.integral / nfiles) - E = d["MCPrimary"]["energy"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles, 5e-3) - - with self.assertRaises(TypeError): - simweights.IceTopWeighter(d, nfiles=10) - - with self.assertRaises(KeyError): - simweights.IceTopWeighter({"MCParticle": weight}) - - with self.assertRaises(KeyError): - simweights.IceTopWeighter({"I3TopInjectorInfo": info}) +@pytest.mark.parametrize("nfiles", (1, 5, 50)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_icetop_corsika(nfiles, flux): + nevents = 10000 + pdgid = 12 + c1 = simweights.NaturalRateCylinder(0, 300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + weight = np.zeros(nevents, dtype=particle_dtype) + weight["type"] = pdgid + weight["energy"] = p1.ppf(np.linspace(0, 1, nevents)) + weight["zenith"] = np.arccos(np.linspace(c1.cos_zen_min, c1.cos_zen_max, nevents)) + + rows = nfiles * [ + ( + pdgid, + nevents, + c1.radius, + np.arccos(c1.cos_zen_max), + np.arccos(c1.cos_zen_min), + p1.a, + p1.b, + p1.g, + ), + ] + info = np.array(rows, dtype=info_dtype) + d = {"MCPrimary": weight, "I3TopInjectorInfo": info} + + wobj = simweights.IceTopWeighter(d) + w = wobj.get_weights(flux) + np.testing.assert_allclose(w.sum(), flux * c1.etendue * p1.integral / nfiles) + E = d["MCPrimary"]["energy"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / nfiles, 5e-3) + + with pytest.raises(TypeError): + simweights.IceTopWeighter(d, nfiles=10) + + with pytest.raises(KeyError): + simweights.IceTopWeighter({"MCParticle": weight}) + + with pytest.raises(KeyError): + simweights.IceTopWeighter({"I3TopInjectorInfo": info}) + + +@pytest.mark.parametrize("nevents", (1000, 10000, 100000)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +@pytest.mark.skipif("icetray" not in globals(), reason="Not in an IceTray environment") +def test_icetop_corsika_i3files(nevents, flux): + pdgid = 12 + c1 = simweights.NaturalRateCylinder(0, 300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + info = simclasses.I3TopInjectorInfo() + info.n_events = nevents + info.primary_type = dataclasses.I3Particle.ParticleType(pdgid) + info.sampling_radius = c1.radius + info.min_zenith = np.arccos(c1.cos_zen_max) + info.max_zenith = np.arccos(c1.cos_zen_min) + info.power_law_index = p1.g + info.min_energy = p1.a + info.max_energy = p1.b + + primary = dataclasses.I3Particle() + primary.type = primary.ParticleType(pdgid) + primary.energy = p1.a + primary.dir = dataclasses.I3Direction(np.arccos(c1.cos_zen_max), 0) + + frame = icetray.I3Frame() + frame["I3TopInjectorInfo"] = info + frame["MCPrimary"] = primary + w = simweights.IceTopWeighter(frame).get_weights(flux) + assert w == approx(flux / c1.pdf(c1.cos_zen_max) / p1.pdf(primary.energy) / nevents) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) From 7c00c557d4afac8ec499c649ac469396f633e498 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:21:48 -0600 Subject: [PATCH 14/20] tests for genie i3files --- .pre-commit-config.yaml | 2 +- src/simweights/_genie_weighter.py | 4 +- src/simweights/_nugen_weighter.py | 20 ++-- tests/test_genie_icetray_weighter.py | 105 ++++++++++++----- tests/test_genie_weighter.py | 166 +++++++++++++++++---------- 5 files changed, 192 insertions(+), 105 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcec4b6..5f0c22f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: exclude: ^contrib/ additional_dependencies: [numpy, pandas] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.0 + rev: v0.9.1 hooks: - id: ruff args: [--fix, --show-fixes] diff --git a/src/simweights/_genie_weighter.py b/src/simweights/_genie_weighter.py index 1c92a6e..a07c3ac 100644 --- a/src/simweights/_genie_weighter.py +++ b/src/simweights/_genie_weighter.py @@ -46,8 +46,8 @@ def genie_icetray_surface( (pid, _, _, _, _) = row mask = np.all(gen_schemes == row[None, :], axis=1) - spatial = nugen_spatial(mcweightdict[mask]) - spectrum = nugen_spectrum(mcweightdict[mask]) + spatial = nugen_spatial(mcweightdict, mask) + spectrum = nugen_spectrum(mcweightdict, mask) type_weight = nufraction if pid > 0 else 1 - nufraction n_events = type_weight * constcol(mcweightdict, "NEvents", mask) diff --git a/src/simweights/_nugen_weighter.py b/src/simweights/_nugen_weighter.py index 299b94e..9f53ba6 100644 --- a/src/simweights/_nugen_weighter.py +++ b/src/simweights/_nugen_weighter.py @@ -14,20 +14,20 @@ from ._weighter import Weighter -def nugen_spatial(table: Any) -> SpatialDist: +def nugen_spatial(table: Any, mask: Any = None) -> SpatialDist: """Inspect the ``I3MCWeightDict`` table of a nugen file to generate the spatial distribution. It will either return a CircleInjector or UniformSolidAngleCylinder depending on how the dataset was generated. """ - max_cos = np.cos(constcol(table, "MinZenith")) - min_cos = np.cos(constcol(table, "MaxZenith")) + max_cos = np.cos(constcol(table, "MinZenith", mask)) + min_cos = np.cos(constcol(table, "MaxZenith", mask)) # Before V04-01-00, nugen injection primaries on the surface of a circle perpendicular to the momentum # vector of the primary, this can be determined by checking `InjectionSurfaceR`. It will # be > 0 for circle injection and -1 for surface injection. In new versions >V6-00-00 it is not even # present indicating surface mode - injection_radius = constcol(table, "InjectionSurfaceR") if has_column(table, "InjectionSurfaceR") else -1 + injection_radius = constcol(table, "InjectionSurfaceR", mask) if has_column(table, "InjectionSurfaceR") else -1 if injection_radius > 0: return CircleInjector(injection_radius, min_cos, max_cos, "cos_zen") @@ -35,18 +35,18 @@ def nugen_spatial(table: Any) -> SpatialDist: # Surface mode was added in V04-01-00 but the cylinder size was hard coded, `CylinderHeight` and # `CylinderRadius` were added after later V06-00-00. If they are not in the table then use the # hardcoded values - cylinder_height = constcol(table, "CylinderHeight") if has_column(table, "CylinderHeight") else 1900 - cylinder_radius = constcol(table, "CylinderRadius") if has_column(table, "CylinderRadius") else 950 + cylinder_height = constcol(table, "CylinderHeight", mask) if has_column(table, "CylinderHeight") else 1900 + cylinder_radius = constcol(table, "CylinderRadius", mask) if has_column(table, "CylinderRadius") else 950 return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos, "cos_zen") -def nugen_spectrum(table: Any) -> PowerLaw: +def nugen_spectrum(table: Any, mask: Any = None) -> PowerLaw: """Inspect the ``I3MCWeightDict`` table of a nugen file to generate to represent the energy spectrum.""" - min_energy = 10 ** constcol(table, "MinEnergyLog") - max_energy = 10 ** constcol(table, "MaxEnergyLog") + min_energy = 10 ** constcol(table, "MinEnergyLog", mask) + max_energy = 10 ** constcol(table, "MaxEnergyLog", mask) # the energy spectrum is always powerlaw however nugen uses positive value of `PowerLawIndex` # for negative slopes ie +2 means E**-2 injection spectrum - power_law_index = -constcol(table, "PowerLawIndex") + power_law_index = -constcol(table, "PowerLawIndex", mask) assert power_law_index <= 0 return PowerLaw(power_law_index, min_energy, max_energy, "energy") diff --git a/tests/test_genie_icetray_weighter.py b/tests/test_genie_icetray_weighter.py index a318c0b..75670a8 100755 --- a/tests/test_genie_icetray_weighter.py +++ b/tests/test_genie_icetray_weighter.py @@ -4,13 +4,19 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np import pandas as pd +import pytest +from pytest import approx from simweights import CircleInjector, GenieWeighter, PowerLaw +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + mcwd_keys = [ "NEvents", "MinZenith", @@ -52,41 +58,80 @@ def make_new_table(pdgid, nevents, spatial, spectrum, coszen): return weight, resultdict -class TestGenieIcetrayWeighter(unittest.TestCase): - def test_genie_icetray(self): - nevents = 100000 - coszen = 0.7 - pdgid = 12 - c1 = CircleInjector(300, 0, 1) - p1 = PowerLaw(0, 1e3, 1e4) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +def test_genie_icetray(nfiles, flux): + nevents = 100000 + coszen = 0.7 + pdgid = 12 + c1 = CircleInjector(300, 0, 1) + p1 = PowerLaw(0, 1e3, 1e4) + + t1 = make_new_table(pdgid, nevents, c1, p1, coszen) + + mcwd = pd.DataFrame(t1[0]) + grd = pd.DataFrame(t1[1]) + + f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd} + + wf = GenieWeighter(f1, nfiles=nfiles) + w1 = wf.get_weights(flux) + w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles) + np.testing.assert_allclose(w1.sum(), w2) + E = mcwd["PrimaryNeutrinoEnergy"] + y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3) - t1 = make_new_table(pdgid, nevents, c1, p1, coszen) - mcwd = pd.DataFrame(t1[0]) - grd = pd.DataFrame(t1[1]) +def test_empty(): + with pytest.raises(RuntimeError): + x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}} + GenieWeighter(x, nfiles=1) - f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd} + with pytest.raises(RuntimeError): + x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}} + GenieWeighter(x) + + +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +@pytest.mark.parametrize("nevents", (10000, 100000, 1000000)) +def test_genie_icetray_i3files(nfiles, flux, nevents): + coszen = 0.7 + pdgid = 12 + energy = 5e3 + c1 = CircleInjector(300, 0, 1) + p1 = PowerLaw(0, 1e3, 1e4) + + weight = dataclasses.I3MapStringDouble() + weight["NEvents"] = nevents + weight["MinZenith"] = np.arccos(c1.cos_zen_max) + weight["MaxZenith"] = np.arccos(c1.cos_zen_min) + weight["PowerLawIndex"] = -1 * p1.g + weight["MinEnergyLog"] = np.log10(p1.a) + weight["MaxEnergyLog"] = np.log10(p1.b) + weight["InjectionSurfaceR"] = c1.radius + weight["GeneratorVolume"] = 1.0 + weight["PrimaryNeutrinoEnergy"] = energy - for nfiles in [1, 10, 100]: - wf = GenieWeighter(f1, nfiles=nfiles) - for flux in [1e-6, 1, 1e6]: - w1 = wf.get_weights(flux) - w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles) - np.testing.assert_allclose(w1.sum(), w2) - E = mcwd["PrimaryNeutrinoEnergy"] - y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3) + resultdict = simclasses.I3GENIEResultDict() + resultdict.neu = pdgid + resultdict.pxv = 1 + resultdict.pyv = 1 + resultdict.pzv = -coszen + resultdict.Ev = energy + resultdict.wght = 1.0 + resultdict._glbprbscale = 1.0 - def test_empty(self): - with self.assertRaises(RuntimeError): - x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}} - GenieWeighter(x, nfiles=1) + frame = icetray.I3Frame() + frame["I3MCWeightDict"] = weight + frame["I3GENIEResultDict"] = resultdict - with self.assertRaises(RuntimeError): - x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}} - GenieWeighter(x) + w1 = GenieWeighter(frame, nfiles=nfiles).get_weights(flux) + w2 = flux / c1.pdf(coszen) / p1.pdf(energy) / (0.7 * nfiles * nevents) + assert w1 == approx(w2) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) diff --git a/tests/test_genie_weighter.py b/tests/test_genie_weighter.py index c2394a2..1cd77f1 100755 --- a/tests/test_genie_weighter.py +++ b/tests/test_genie_weighter.py @@ -4,12 +4,18 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np +import pytest +from pytest import approx import simweights +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + info_dtype = [ ("primary_type", np.int32), ("n_flux_events", np.int32), @@ -25,67 +31,103 @@ result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] -class TestGenieWeighter(unittest.TestCase): - def test_genie(self): - nevents = 10000 - coszen = 0.7 - pdgid = 12 - c1 = simweights.CircleInjector(300, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - - for event_weight in [1e-6, 1e-3, 1]: - for nfiles in [1, 5, 50]: - for include_volscale in [True, False]: - result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] - if include_volscale: - result_dtype.append(("volscale", np.float64)) - - weight = np.zeros(nevents, dtype=result_dtype) - weight["neu"] = pdgid - weight["pzv"] = coszen - weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents)) - weight["wght"] = event_weight - - if include_volscale: - weight["volscale"] = 1 - - rows = nfiles * [ - ( - pdgid, - nevents, - 1, - c1.radius, - np.arccos(c1.cos_zen_max), - np.arccos(c1.cos_zen_min), - p1.a, - p1.b, - p1.g, - ), - ] - info = np.array(rows, dtype=info_dtype) - d = {"I3GenieResult": weight, "I3GenieInfo": info} - - for flux in [0.1, 1, 10]: - wobj = simweights.GenieWeighter(d) - w = wobj.get_weights(flux) - np.testing.assert_allclose( - w.sum(), - flux * event_weight * c1.etendue * p1.integral / nfiles, - ) - E = d["I3GenieResult"]["Ev"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) - - with self.assertRaises(RuntimeError): - simweights.GenieWeighter(d, nfiles=10) - - with self.assertRaises(TypeError): - simweights.GenieWeighter({"I3CorsikaWeight": weight}) - - with self.assertRaises(KeyError): - simweights.GenieWeighter({"I3GenieResult": weight}) +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("nfiles", (1, 5, 50)) +@pytest.mark.parametrize("include_volscale", (True, False)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_genie_reader_weighter(event_weight, nfiles, include_volscale, flux): + nevents = 10000 + coszen = 0.7 + pdgid = 12 + c1 = simweights.CircleInjector(300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] + if include_volscale: + result_dtype.append(("volscale", np.float64)) + + weight = np.zeros(nevents, dtype=result_dtype) + weight["neu"] = pdgid + weight["pzv"] = coszen + weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents)) + weight["wght"] = event_weight + + if include_volscale: + weight["volscale"] = 1 + + rows = nfiles * [ + ( + pdgid, + nevents, + 1, + c1.radius, + np.arccos(c1.cos_zen_max), + np.arccos(c1.cos_zen_min), + p1.a, + p1.b, + p1.g, + ), + ] + info = np.array(rows, dtype=info_dtype) + d = {"I3GenieResult": weight, "I3GenieInfo": info} + + wobj = simweights.GenieWeighter(d) + w = wobj.get_weights(flux) + np.testing.assert_allclose( + w.sum(), + flux * event_weight * c1.etendue * p1.integral / nfiles, + ) + E = d["I3GenieResult"]["Ev"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) + + with pytest.raises(RuntimeError): + simweights.GenieWeighter(d, nfiles=10) + + with pytest.raises(TypeError): + simweights.GenieWeighter({"I3CorsikaWeight": weight}) + + with pytest.raises(KeyError): + simweights.GenieWeighter({"I3GenieResult": weight}) + + +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("volscale", (1, 2, 3)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_genie_reader_weighter_i3file(event_weight, volscale, flux): + nevents = 10000 + coszen = 0.7 + pdgid = 12 + energy = 5e3 + c1 = simweights.CircleInjector(300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + weight = simclasses.I3GenieResult() + weight.neu = pdgid + weight.pzv = coszen + weight.Ev = energy + weight.wght = event_weight + weight.volscale = volscale + + info = simclasses.I3GenieInfo() + info.primary_type = dataclasses.I3Particle.ParticleType(pdgid) + info.n_flux_events = nevents + info.global_probability_scale = 1 + info.cylinder_radius = c1.radius + info.min_zenith = np.arccos(c1.cos_zen_max) + info.max_zenith = np.arccos(c1.cos_zen_min) + info.min_energy = p1.a + info.max_energy = p1.b + info.power_law_index = p1.g + + frame = icetray.I3Frame() + frame["I3GenieResult"] = weight + frame["I3GenieInfo"] = info + + w = simweights.GenieWeighter(frame).get_weights(flux) + assert w == approx(flux * volscale * event_weight / c1.pdf(coszen) / p1.pdf(energy) / nevents) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) From 3179a94afcddc0cd0b5b68233d3c4da59dc1c75d Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:34:25 -0600 Subject: [PATCH 15/20] only run i3tests in i3 env --- tests/test_genie_icetray_weighter.py | 1 + tests/test_genie_weighter.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/test_genie_icetray_weighter.py b/tests/test_genie_icetray_weighter.py index 75670a8..30f0c5f 100755 --- a/tests/test_genie_icetray_weighter.py +++ b/tests/test_genie_icetray_weighter.py @@ -97,6 +97,7 @@ def test_empty(): @pytest.mark.parametrize("nfiles", (1, 10, 100)) @pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) @pytest.mark.parametrize("nevents", (10000, 100000, 1000000)) +@pytest.mark.skipif("dataio" not in globals(), reason="Not in an IceTray environment") def test_genie_icetray_i3files(nfiles, flux, nevents): coszen = 0.7 pdgid = 12 diff --git a/tests/test_genie_weighter.py b/tests/test_genie_weighter.py index 1cd77f1..75e4129 100755 --- a/tests/test_genie_weighter.py +++ b/tests/test_genie_weighter.py @@ -95,6 +95,7 @@ def test_genie_reader_weighter(event_weight, nfiles, include_volscale, flux): @pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) @pytest.mark.parametrize("volscale", (1, 2, 3)) @pytest.mark.parametrize("flux", (0.1, 1, 10)) +@pytest.mark.skipif("dataio" not in globals(), reason="Not in an IceTray environment") def test_genie_reader_weighter_i3file(event_weight, volscale, flux): nevents = 10000 coszen = 0.7 From a1ff7e6d3d173fba920b36af760b369017c7e679 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Thu, 16 Jan 2025 05:59:40 -0600 Subject: [PATCH 16/20] add example --- examples/triggered_corsika_i3file.py | 44 ++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 examples/triggered_corsika_i3file.py diff --git a/examples/triggered_corsika_i3file.py b/examples/triggered_corsika_i3file.py new file mode 100644 index 0000000..b38be6a --- /dev/null +++ b/examples/triggered_corsika_i3file.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: © 2025 the SimWeights contributors +# +# SPDX-License-Identifier: BSD-2-Clause + +from pathlib import Path +from collections import defaultdict +import numpy as np + +from simweights import GaisserH3a, CorsikaWeighter +from icecube import dataclasses,icetray,hdfwriter, simclasses + +FILE_DIR = Path("/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999") +files = sorted(str(f) for f in FILE_DIR.glob("Level2_IC86.2016_corsika.021889.000000.i3.zst")) + + +class Weighter(icetray.I3Module): + def __init__(self, context): + icetray.I3Module.__init__(self, context) + self.weighter = None + self.fluxmodel = GaisserH3a() + self.s_frames = defaultdict(int) + + def Simulation(self,frame): + pdgid = frame['I3PrimaryInjectorInfo'].primary_type + self.s_frames[pdgid]+=1 + + def DAQ(self, frame): + weighter = CorsikaWeighter(frame) + weight = weighter.get_weights(self.fluxmodel)[0] + name = f"weight_{self.fluxmodel.__class__.__name__}" + frame[name] = dataclasses.I3Double(weight) + self.PushFrame(frame) + + def Finish(self): + print("Weights Need to be adjusted by the following factors:") + for pdgid, counts in self.s_frames.items(): + print(f"{pdgid!s:>11} {counts}") + +tray = icetray.I3Tray() +tray.Add("I3Reader", FileNameList=files) +tray.Add(Weighter) +tray.Execute() From 36c3392e5f68dd2d28708fd38bae4a86601d3a7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2025 12:00:13 +0000 Subject: [PATCH 17/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/triggered_corsika_i3file.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/triggered_corsika_i3file.py b/examples/triggered_corsika_i3file.py index b38be6a..7ab65f1 100644 --- a/examples/triggered_corsika_i3file.py +++ b/examples/triggered_corsika_i3file.py @@ -4,12 +4,13 @@ # # SPDX-License-Identifier: BSD-2-Clause -from pathlib import Path from collections import defaultdict +from pathlib import Path + import numpy as np +from icecube import dataclasses, hdfwriter, icetray, simclasses -from simweights import GaisserH3a, CorsikaWeighter -from icecube import dataclasses,icetray,hdfwriter, simclasses +from simweights import CorsikaWeighter, GaisserH3a FILE_DIR = Path("/data/sim/IceCube/2016/filtered/level2/CORSIKA-in-ice/21889/0000000-0000999") files = sorted(str(f) for f in FILE_DIR.glob("Level2_IC86.2016_corsika.021889.000000.i3.zst")) @@ -22,9 +23,9 @@ def __init__(self, context): self.fluxmodel = GaisserH3a() self.s_frames = defaultdict(int) - def Simulation(self,frame): - pdgid = frame['I3PrimaryInjectorInfo'].primary_type - self.s_frames[pdgid]+=1 + def Simulation(self, frame): + pdgid = frame["I3PrimaryInjectorInfo"].primary_type + self.s_frames[pdgid] += 1 def DAQ(self, frame): weighter = CorsikaWeighter(frame) @@ -38,6 +39,7 @@ def Finish(self): for pdgid, counts in self.s_frames.items(): print(f"{pdgid!s:>11} {counts}") + tray = icetray.I3Tray() tray.Add("I3Reader", FileNameList=files) tray.Add(Weighter) From 6509e200881cb6c45b102702da7824b2045d6bd1 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Thu, 16 Jan 2025 06:10:28 -0600 Subject: [PATCH 18/20] add example --- examples/triggered_corsika_i3file.py | 8 ++++---- pyproject.toml | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/triggered_corsika_i3file.py b/examples/triggered_corsika_i3file.py index 7ab65f1..4c0d81a 100644 --- a/examples/triggered_corsika_i3file.py +++ b/examples/triggered_corsika_i3file.py @@ -17,24 +17,24 @@ class Weighter(icetray.I3Module): - def __init__(self, context): + def __init__(self, context: icetray.I3Context) -> None: icetray.I3Module.__init__(self, context) self.weighter = None self.fluxmodel = GaisserH3a() self.s_frames = defaultdict(int) - def Simulation(self, frame): + def Simulation(self, frame: icetray.I3Frame) -> None: pdgid = frame["I3PrimaryInjectorInfo"].primary_type self.s_frames[pdgid] += 1 - def DAQ(self, frame): + def DAQ(self, frame: icetray.I3Frame) -> None: weighter = CorsikaWeighter(frame) weight = weighter.get_weights(self.fluxmodel)[0] name = f"weight_{self.fluxmodel.__class__.__name__}" frame[name] = dataclasses.I3Double(weight) self.PushFrame(frame) - def Finish(self): + def Finish(self) -> None: print("Weights Need to be adjusted by the following factors:") for pdgid, counts in self.s_frames.items(): print(f"{pdgid!s:>11} {counts}") diff --git a/pyproject.toml b/pyproject.toml index 40139c5..fcb2be2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,7 +110,8 @@ select = ["ALL"] "D", # pydocstyle "F401", # unused-import "T201", # flake8-print - "PLC0206" # Extracting value from dictionary without calling `.items()` + "PLC0206", # Extracting value from dictionary without calling `.items()` + "N802" # Function name should be lowercase ] "tests/*" = [ "D", # pydocstyle From c141ddb6e1d4de930191a582b6fa2608039192de Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Thu, 16 Jan 2025 06:17:36 -0600 Subject: [PATCH 19/20] add executable bit --- examples/triggered_corsika_i3file.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 examples/triggered_corsika_i3file.py diff --git a/examples/triggered_corsika_i3file.py b/examples/triggered_corsika_i3file.py old mode 100644 new mode 100755 From 82b7c6748342b3f416089128574a39cd33f3f62d Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Thu, 16 Jan 2025 11:45:39 -0600 Subject: [PATCH 20/20] add documentation for i3frame --- docs/i3frame_support.rst | 33 ++++++++++++++++++++++++++++ docs/index.rst | 1 + docs/reading_files.rst | 4 ++++ examples/triggered_corsika_i3file.py | 1 + src/simweights/_fluxes.py | 8 +++---- 5 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 docs/i3frame_support.rst diff --git a/docs/i3frame_support.rst b/docs/i3frame_support.rst new file mode 100644 index 0000000..da5f563 --- /dev/null +++ b/docs/i3frame_support.rst @@ -0,0 +1,33 @@ +.. SPDX-FileCopyrightText: © 2025 the SimWeights contributors +.. +.. SPDX-License-Identifier: BSD-2-Clause + +I3Frame Support +=============== + +You can calculate the weight for a single event directly from an ``I3Frame`` +with a number of warnings. First weights are only meaningful for a sample of +Monte Carlo events not a single event. The events calculated here will not be +usfull to combine different samples. Second, the normalization may be off in +subtle ways. For example in triggered CORSIKA there will be 8 S-frames for +each primary type per file which need to be accounted for. When reading an HDF5 +file this is correctly accounted for but there is no way to account for this in +IceTray. It is strongly encouraged that you only weight events after obtaining +a complete sample in an HDF5 or similar file. But if you really need to +calculate weights in IceTray you can follow the example below: + +.. literalinclude:: ../examples/triggered_corsika_i3file.py + :start-after: start-example1 + +Note that the module keeps track of how many S-Frames there are and hence the +factor the weight is incorrect by. But, because of the serial nature of +IceTray, it can't retroactivly apply that correction to events that have +already been processed. The output should look like:: + + PPlus 8 + He4Nucleus 8 + N14Nucleus 8 + Al27Nucleus 8 + Fe56Nucleus 8 + +Indicating that there were 8 S-Frames of each primary type. diff --git a/docs/index.rst b/docs/index.rst index 2b46b81..99f0f95 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -42,6 +42,7 @@ reading_files reading_nugen units + i3frame_support .. toctree:: :maxdepth: 1 diff --git a/docs/reading_files.rst b/docs/reading_files.rst index f51dcaa..aeda908 100644 --- a/docs/reading_files.rst +++ b/docs/reading_files.rst @@ -80,7 +80,11 @@ The table below shows the what tables simweights looks for for each type of simu +--------------------------+---------------------------+---------------------------------------------+ | CORSIKA without S-Frames | none | ``CorsikaWeightMap``, ``PolyplopiaPrimary`` | +--------------------------+---------------------------+---------------------------------------------+ +| IceTop CORSIKA | ``I3TopInjectorInfo`` | ``MCPrimary`` | ++--------------------------+---------------------------+---------------------------------------------+ | neutrino-generator | none | ``I3MCWeightDict`` | +--------------------------+---------------------------+---------------------------------------------+ | genie-reader | ``I3GenieInfo`` | ``I3GenieResult`` | +--------------------------+---------------------------+---------------------------------------------+ +| genie-icetray | none | ``I3MCWeightDict``, ``I3GENIEResultDict`` | ++--------------------------+---------------------------+---------------------------------------------+ diff --git a/examples/triggered_corsika_i3file.py b/examples/triggered_corsika_i3file.py index 4c0d81a..c597f8c 100755 --- a/examples/triggered_corsika_i3file.py +++ b/examples/triggered_corsika_i3file.py @@ -4,6 +4,7 @@ # # SPDX-License-Identifier: BSD-2-Clause +# start-example1 from collections import defaultdict from pathlib import Path diff --git a/src/simweights/_fluxes.py b/src/simweights/_fluxes.py index 9571781..437b5df 100644 --- a/src/simweights/_fluxes.py +++ b/src/simweights/_fluxes.py @@ -331,7 +331,7 @@ class GlobalFitGST_IT(CosmicRayFlux): # pylint: disable=invalid-name class GlobalSplineFitBase(CosmicRayFlux): - r"""Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]. + r"""Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]_. Base class all actual classes should inherit from this one. """ @@ -350,7 +350,7 @@ def __init__(self: GlobalSplineFitBase) -> None: class GlobalSplineFit(GlobalSplineFitBase): - r"""Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski].""" + r"""Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]_.""" pdgids = PDGID_ALL @@ -362,7 +362,7 @@ def __init__(self: GlobalSplineFit) -> None: class GlobalSplineFit5Comp(GlobalSplineFitBase): r"""Sum of the flux of the GSF model for the standard 5 components injected by IceCube. - GSF is a Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]. + GSF is a Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]_. """ pdgids = PDGID_5COMP @@ -376,7 +376,7 @@ class GlobalSplineFit_IT(GlobalSplineFitBase): # pylint: disable=invalid-name r"""Sum of the flux of the GSF model for the standard 4 components injected by IceCube. [(H), (He), (Li, Be, B, C, N, O, F, Ne), (Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, Ni)] - GSF is a Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]. + GSF is a Data-driven spline fit of the cosmic ray spectrum by Dembinski et. al. \ [#GSFDembinski]_. """ pdgids = PDGID_4COMP