From 7c00c557d4afac8ec499c649ac469396f633e498 Mon Sep 17 00:00:00 2001 From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:21:48 -0600 Subject: [PATCH] tests for genie i3files --- .pre-commit-config.yaml | 2 +- src/simweights/_genie_weighter.py | 4 +- src/simweights/_nugen_weighter.py | 20 ++-- tests/test_genie_icetray_weighter.py | 105 ++++++++++++----- tests/test_genie_weighter.py | 166 +++++++++++++++++---------- 5 files changed, 192 insertions(+), 105 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcec4b6..5f0c22f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: exclude: ^contrib/ additional_dependencies: [numpy, pandas] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.0 + rev: v0.9.1 hooks: - id: ruff args: [--fix, --show-fixes] diff --git a/src/simweights/_genie_weighter.py b/src/simweights/_genie_weighter.py index 1c92a6e..a07c3ac 100644 --- a/src/simweights/_genie_weighter.py +++ b/src/simweights/_genie_weighter.py @@ -46,8 +46,8 @@ def genie_icetray_surface( (pid, _, _, _, _) = row mask = np.all(gen_schemes == row[None, :], axis=1) - spatial = nugen_spatial(mcweightdict[mask]) - spectrum = nugen_spectrum(mcweightdict[mask]) + spatial = nugen_spatial(mcweightdict, mask) + spectrum = nugen_spectrum(mcweightdict, mask) type_weight = nufraction if pid > 0 else 1 - nufraction n_events = type_weight * constcol(mcweightdict, "NEvents", mask) diff --git a/src/simweights/_nugen_weighter.py b/src/simweights/_nugen_weighter.py index 299b94e..9f53ba6 100644 --- a/src/simweights/_nugen_weighter.py +++ b/src/simweights/_nugen_weighter.py @@ -14,20 +14,20 @@ from ._weighter import Weighter -def nugen_spatial(table: Any) -> SpatialDist: +def nugen_spatial(table: Any, mask: Any = None) -> SpatialDist: """Inspect the ``I3MCWeightDict`` table of a nugen file to generate the spatial distribution. It will either return a CircleInjector or UniformSolidAngleCylinder depending on how the dataset was generated. """ - max_cos = np.cos(constcol(table, "MinZenith")) - min_cos = np.cos(constcol(table, "MaxZenith")) + max_cos = np.cos(constcol(table, "MinZenith", mask)) + min_cos = np.cos(constcol(table, "MaxZenith", mask)) # Before V04-01-00, nugen injection primaries on the surface of a circle perpendicular to the momentum # vector of the primary, this can be determined by checking `InjectionSurfaceR`. It will # be > 0 for circle injection and -1 for surface injection. In new versions >V6-00-00 it is not even # present indicating surface mode - injection_radius = constcol(table, "InjectionSurfaceR") if has_column(table, "InjectionSurfaceR") else -1 + injection_radius = constcol(table, "InjectionSurfaceR", mask) if has_column(table, "InjectionSurfaceR") else -1 if injection_radius > 0: return CircleInjector(injection_radius, min_cos, max_cos, "cos_zen") @@ -35,18 +35,18 @@ def nugen_spatial(table: Any) -> SpatialDist: # Surface mode was added in V04-01-00 but the cylinder size was hard coded, `CylinderHeight` and # `CylinderRadius` were added after later V06-00-00. If they are not in the table then use the # hardcoded values - cylinder_height = constcol(table, "CylinderHeight") if has_column(table, "CylinderHeight") else 1900 - cylinder_radius = constcol(table, "CylinderRadius") if has_column(table, "CylinderRadius") else 950 + cylinder_height = constcol(table, "CylinderHeight", mask) if has_column(table, "CylinderHeight") else 1900 + cylinder_radius = constcol(table, "CylinderRadius", mask) if has_column(table, "CylinderRadius") else 950 return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos, "cos_zen") -def nugen_spectrum(table: Any) -> PowerLaw: +def nugen_spectrum(table: Any, mask: Any = None) -> PowerLaw: """Inspect the ``I3MCWeightDict`` table of a nugen file to generate to represent the energy spectrum.""" - min_energy = 10 ** constcol(table, "MinEnergyLog") - max_energy = 10 ** constcol(table, "MaxEnergyLog") + min_energy = 10 ** constcol(table, "MinEnergyLog", mask) + max_energy = 10 ** constcol(table, "MaxEnergyLog", mask) # the energy spectrum is always powerlaw however nugen uses positive value of `PowerLawIndex` # for negative slopes ie +2 means E**-2 injection spectrum - power_law_index = -constcol(table, "PowerLawIndex") + power_law_index = -constcol(table, "PowerLawIndex", mask) assert power_law_index <= 0 return PowerLaw(power_law_index, min_energy, max_energy, "energy") diff --git a/tests/test_genie_icetray_weighter.py b/tests/test_genie_icetray_weighter.py index a318c0b..75670a8 100755 --- a/tests/test_genie_icetray_weighter.py +++ b/tests/test_genie_icetray_weighter.py @@ -4,13 +4,19 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np import pandas as pd +import pytest +from pytest import approx from simweights import CircleInjector, GenieWeighter, PowerLaw +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + mcwd_keys = [ "NEvents", "MinZenith", @@ -52,41 +58,80 @@ def make_new_table(pdgid, nevents, spatial, spectrum, coszen): return weight, resultdict -class TestGenieIcetrayWeighter(unittest.TestCase): - def test_genie_icetray(self): - nevents = 100000 - coszen = 0.7 - pdgid = 12 - c1 = CircleInjector(300, 0, 1) - p1 = PowerLaw(0, 1e3, 1e4) +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +def test_genie_icetray(nfiles, flux): + nevents = 100000 + coszen = 0.7 + pdgid = 12 + c1 = CircleInjector(300, 0, 1) + p1 = PowerLaw(0, 1e3, 1e4) + + t1 = make_new_table(pdgid, nevents, c1, p1, coszen) + + mcwd = pd.DataFrame(t1[0]) + grd = pd.DataFrame(t1[1]) + + f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd} + + wf = GenieWeighter(f1, nfiles=nfiles) + w1 = wf.get_weights(flux) + w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles) + np.testing.assert_allclose(w1.sum(), w2) + E = mcwd["PrimaryNeutrinoEnergy"] + y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3) - t1 = make_new_table(pdgid, nevents, c1, p1, coszen) - mcwd = pd.DataFrame(t1[0]) - grd = pd.DataFrame(t1[1]) +def test_empty(): + with pytest.raises(RuntimeError): + x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}} + GenieWeighter(x, nfiles=1) - f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd} + with pytest.raises(RuntimeError): + x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}} + GenieWeighter(x) + + +@pytest.mark.parametrize("nfiles", (1, 10, 100)) +@pytest.mark.parametrize("flux", (1e-6, 1, 1e6)) +@pytest.mark.parametrize("nevents", (10000, 100000, 1000000)) +def test_genie_icetray_i3files(nfiles, flux, nevents): + coszen = 0.7 + pdgid = 12 + energy = 5e3 + c1 = CircleInjector(300, 0, 1) + p1 = PowerLaw(0, 1e3, 1e4) + + weight = dataclasses.I3MapStringDouble() + weight["NEvents"] = nevents + weight["MinZenith"] = np.arccos(c1.cos_zen_max) + weight["MaxZenith"] = np.arccos(c1.cos_zen_min) + weight["PowerLawIndex"] = -1 * p1.g + weight["MinEnergyLog"] = np.log10(p1.a) + weight["MaxEnergyLog"] = np.log10(p1.b) + weight["InjectionSurfaceR"] = c1.radius + weight["GeneratorVolume"] = 1.0 + weight["PrimaryNeutrinoEnergy"] = energy - for nfiles in [1, 10, 100]: - wf = GenieWeighter(f1, nfiles=nfiles) - for flux in [1e-6, 1, 1e6]: - w1 = wf.get_weights(flux) - w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles) - np.testing.assert_allclose(w1.sum(), w2) - E = mcwd["PrimaryNeutrinoEnergy"] - y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3) + resultdict = simclasses.I3GENIEResultDict() + resultdict.neu = pdgid + resultdict.pxv = 1 + resultdict.pyv = 1 + resultdict.pzv = -coszen + resultdict.Ev = energy + resultdict.wght = 1.0 + resultdict._glbprbscale = 1.0 - def test_empty(self): - with self.assertRaises(RuntimeError): - x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}} - GenieWeighter(x, nfiles=1) + frame = icetray.I3Frame() + frame["I3MCWeightDict"] = weight + frame["I3GENIEResultDict"] = resultdict - with self.assertRaises(RuntimeError): - x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}} - GenieWeighter(x) + w1 = GenieWeighter(frame, nfiles=nfiles).get_weights(flux) + w2 = flux / c1.pdf(coszen) / p1.pdf(energy) / (0.7 * nfiles * nevents) + assert w1 == approx(w2) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]])) diff --git a/tests/test_genie_weighter.py b/tests/test_genie_weighter.py index c2394a2..1cd77f1 100755 --- a/tests/test_genie_weighter.py +++ b/tests/test_genie_weighter.py @@ -4,12 +4,18 @@ # # SPDX-License-Identifier: BSD-2-Clause -import unittest +import contextlib +import sys import numpy as np +import pytest +from pytest import approx import simweights +with contextlib.suppress(ImportError): + from icecube import dataclasses, icetray, simclasses + info_dtype = [ ("primary_type", np.int32), ("n_flux_events", np.int32), @@ -25,67 +31,103 @@ result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] -class TestGenieWeighter(unittest.TestCase): - def test_genie(self): - nevents = 10000 - coszen = 0.7 - pdgid = 12 - c1 = simweights.CircleInjector(300, 0, 1) - p1 = simweights.PowerLaw(0, 1e3, 1e4) - - for event_weight in [1e-6, 1e-3, 1]: - for nfiles in [1, 5, 50]: - for include_volscale in [True, False]: - result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] - if include_volscale: - result_dtype.append(("volscale", np.float64)) - - weight = np.zeros(nevents, dtype=result_dtype) - weight["neu"] = pdgid - weight["pzv"] = coszen - weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents)) - weight["wght"] = event_weight - - if include_volscale: - weight["volscale"] = 1 - - rows = nfiles * [ - ( - pdgid, - nevents, - 1, - c1.radius, - np.arccos(c1.cos_zen_max), - np.arccos(c1.cos_zen_min), - p1.a, - p1.b, - p1.g, - ), - ] - info = np.array(rows, dtype=info_dtype) - d = {"I3GenieResult": weight, "I3GenieInfo": info} - - for flux in [0.1, 1, 10]: - wobj = simweights.GenieWeighter(d) - w = wobj.get_weights(flux) - np.testing.assert_allclose( - w.sum(), - flux * event_weight * c1.etendue * p1.integral / nfiles, - ) - E = d["I3GenieResult"]["Ev"] - y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) - Ewidth = np.ediff1d(x) - np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) - - with self.assertRaises(RuntimeError): - simweights.GenieWeighter(d, nfiles=10) - - with self.assertRaises(TypeError): - simweights.GenieWeighter({"I3CorsikaWeight": weight}) - - with self.assertRaises(KeyError): - simweights.GenieWeighter({"I3GenieResult": weight}) +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("nfiles", (1, 5, 50)) +@pytest.mark.parametrize("include_volscale", (True, False)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_genie_reader_weighter(event_weight, nfiles, include_volscale, flux): + nevents = 10000 + coszen = 0.7 + pdgid = 12 + c1 = simweights.CircleInjector(300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)] + if include_volscale: + result_dtype.append(("volscale", np.float64)) + + weight = np.zeros(nevents, dtype=result_dtype) + weight["neu"] = pdgid + weight["pzv"] = coszen + weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents)) + weight["wght"] = event_weight + + if include_volscale: + weight["volscale"] = 1 + + rows = nfiles * [ + ( + pdgid, + nevents, + 1, + c1.radius, + np.arccos(c1.cos_zen_max), + np.arccos(c1.cos_zen_min), + p1.a, + p1.b, + p1.g, + ), + ] + info = np.array(rows, dtype=info_dtype) + d = {"I3GenieResult": weight, "I3GenieInfo": info} + + wobj = simweights.GenieWeighter(d) + w = wobj.get_weights(flux) + np.testing.assert_allclose( + w.sum(), + flux * event_weight * c1.etendue * p1.integral / nfiles, + ) + E = d["I3GenieResult"]["Ev"] + y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b]) + Ewidth = np.ediff1d(x) + np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3) + + with pytest.raises(RuntimeError): + simweights.GenieWeighter(d, nfiles=10) + + with pytest.raises(TypeError): + simweights.GenieWeighter({"I3CorsikaWeight": weight}) + + with pytest.raises(KeyError): + simweights.GenieWeighter({"I3GenieResult": weight}) + + +@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1)) +@pytest.mark.parametrize("volscale", (1, 2, 3)) +@pytest.mark.parametrize("flux", (0.1, 1, 10)) +def test_genie_reader_weighter_i3file(event_weight, volscale, flux): + nevents = 10000 + coszen = 0.7 + pdgid = 12 + energy = 5e3 + c1 = simweights.CircleInjector(300, 0, 1) + p1 = simweights.PowerLaw(0, 1e3, 1e4) + + weight = simclasses.I3GenieResult() + weight.neu = pdgid + weight.pzv = coszen + weight.Ev = energy + weight.wght = event_weight + weight.volscale = volscale + + info = simclasses.I3GenieInfo() + info.primary_type = dataclasses.I3Particle.ParticleType(pdgid) + info.n_flux_events = nevents + info.global_probability_scale = 1 + info.cylinder_radius = c1.radius + info.min_zenith = np.arccos(c1.cos_zen_max) + info.max_zenith = np.arccos(c1.cos_zen_min) + info.min_energy = p1.a + info.max_energy = p1.b + info.power_law_index = p1.g + + frame = icetray.I3Frame() + frame["I3GenieResult"] = weight + frame["I3GenieInfo"] = info + + w = simweights.GenieWeighter(frame).get_weights(flux) + assert w == approx(flux * volscale * event_weight / c1.pdf(coszen) / p1.pdf(energy) / nevents) if __name__ == "__main__": - unittest.main() + sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]]))