From 7c00c557d4afac8ec499c649ac469396f633e498 Mon Sep 17 00:00:00 2001
From: Kevin Meagher <11620178+kjmeagher@users.noreply.github.com>
Date: Wed, 15 Jan 2025 15:21:48 -0600
Subject: [PATCH] tests for genie i3files

---
 .pre-commit-config.yaml              |   2 +-
 src/simweights/_genie_weighter.py    |   4 +-
 src/simweights/_nugen_weighter.py    |  20 ++--
 tests/test_genie_icetray_weighter.py | 105 ++++++++++++-----
 tests/test_genie_weighter.py         | 166 +++++++++++++++++----------
 5 files changed, 192 insertions(+), 105 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fcec4b6..5f0c22f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,7 +43,7 @@ repos:
         exclude: ^contrib/
         additional_dependencies: [numpy, pandas]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.0
+    rev: v0.9.1
     hooks:
       - id: ruff
         args: [--fix, --show-fixes]
diff --git a/src/simweights/_genie_weighter.py b/src/simweights/_genie_weighter.py
index 1c92a6e..a07c3ac 100644
--- a/src/simweights/_genie_weighter.py
+++ b/src/simweights/_genie_weighter.py
@@ -46,8 +46,8 @@ def genie_icetray_surface(
         (pid, _, _, _, _) = row
         mask = np.all(gen_schemes == row[None, :], axis=1)
 
-        spatial = nugen_spatial(mcweightdict[mask])
-        spectrum = nugen_spectrum(mcweightdict[mask])
+        spatial = nugen_spatial(mcweightdict, mask)
+        spectrum = nugen_spectrum(mcweightdict, mask)
 
         type_weight = nufraction if pid > 0 else 1 - nufraction
         n_events = type_weight * constcol(mcweightdict, "NEvents", mask)
diff --git a/src/simweights/_nugen_weighter.py b/src/simweights/_nugen_weighter.py
index 299b94e..9f53ba6 100644
--- a/src/simweights/_nugen_weighter.py
+++ b/src/simweights/_nugen_weighter.py
@@ -14,20 +14,20 @@
 from ._weighter import Weighter
 
 
-def nugen_spatial(table: Any) -> SpatialDist:
+def nugen_spatial(table: Any, mask: Any = None) -> SpatialDist:
     """Inspect the ``I3MCWeightDict`` table of a nugen file to generate the spatial distribution.
 
     It will either return a CircleInjector or UniformSolidAngleCylinder
     depending on how the dataset was generated.
     """
-    max_cos = np.cos(constcol(table, "MinZenith"))
-    min_cos = np.cos(constcol(table, "MaxZenith"))
+    max_cos = np.cos(constcol(table, "MinZenith", mask))
+    min_cos = np.cos(constcol(table, "MaxZenith", mask))
 
     # Before V04-01-00, nugen injection primaries on the surface of a circle perpendicular to the momentum
     # vector of the primary, this can be determined by checking `InjectionSurfaceR`. It will
     # be > 0 for circle injection and -1 for surface injection. In new versions >V6-00-00 it is not even
     # present indicating surface mode
-    injection_radius = constcol(table, "InjectionSurfaceR") if has_column(table, "InjectionSurfaceR") else -1
+    injection_radius = constcol(table, "InjectionSurfaceR", mask) if has_column(table, "InjectionSurfaceR") else -1
 
     if injection_radius > 0:
         return CircleInjector(injection_radius, min_cos, max_cos, "cos_zen")
@@ -35,18 +35,18 @@ def nugen_spatial(table: Any) -> SpatialDist:
     # Surface mode was added in V04-01-00 but the cylinder size was hard coded, `CylinderHeight` and
     # `CylinderRadius` were added after later V06-00-00. If they are not in the table then use the
     # hardcoded values
-    cylinder_height = constcol(table, "CylinderHeight") if has_column(table, "CylinderHeight") else 1900
-    cylinder_radius = constcol(table, "CylinderRadius") if has_column(table, "CylinderRadius") else 950
+    cylinder_height = constcol(table, "CylinderHeight", mask) if has_column(table, "CylinderHeight") else 1900
+    cylinder_radius = constcol(table, "CylinderRadius", mask) if has_column(table, "CylinderRadius") else 950
     return UniformSolidAngleCylinder(cylinder_height, cylinder_radius, min_cos, max_cos, "cos_zen")
 
 
-def nugen_spectrum(table: Any) -> PowerLaw:
+def nugen_spectrum(table: Any, mask: Any = None) -> PowerLaw:
     """Inspect the ``I3MCWeightDict`` table of a nugen file to generate to represent the energy spectrum."""
-    min_energy = 10 ** constcol(table, "MinEnergyLog")
-    max_energy = 10 ** constcol(table, "MaxEnergyLog")
+    min_energy = 10 ** constcol(table, "MinEnergyLog", mask)
+    max_energy = 10 ** constcol(table, "MaxEnergyLog", mask)
     # the energy spectrum is always powerlaw however nugen uses positive value of `PowerLawIndex`
     # for negative slopes ie +2 means E**-2 injection spectrum
-    power_law_index = -constcol(table, "PowerLawIndex")
+    power_law_index = -constcol(table, "PowerLawIndex", mask)
     assert power_law_index <= 0
     return PowerLaw(power_law_index, min_energy, max_energy, "energy")
 
diff --git a/tests/test_genie_icetray_weighter.py b/tests/test_genie_icetray_weighter.py
index a318c0b..75670a8 100755
--- a/tests/test_genie_icetray_weighter.py
+++ b/tests/test_genie_icetray_weighter.py
@@ -4,13 +4,19 @@
 #
 # SPDX-License-Identifier: BSD-2-Clause
 
-import unittest
+import contextlib
+import sys
 
 import numpy as np
 import pandas as pd
+import pytest
+from pytest import approx
 
 from simweights import CircleInjector, GenieWeighter, PowerLaw
 
+with contextlib.suppress(ImportError):
+    from icecube import dataclasses, icetray, simclasses
+
 mcwd_keys = [
     "NEvents",
     "MinZenith",
@@ -52,41 +58,80 @@ def make_new_table(pdgid, nevents, spatial, spectrum, coszen):
     return weight, resultdict
 
 
-class TestGenieIcetrayWeighter(unittest.TestCase):
-    def test_genie_icetray(self):
-        nevents = 100000
-        coszen = 0.7
-        pdgid = 12
-        c1 = CircleInjector(300, 0, 1)
-        p1 = PowerLaw(0, 1e3, 1e4)
+@pytest.mark.parametrize("nfiles", (1, 10, 100))
+@pytest.mark.parametrize("flux", (1e-6, 1, 1e6))
+def test_genie_icetray(nfiles, flux):
+    nevents = 100000
+    coszen = 0.7
+    pdgid = 12
+    c1 = CircleInjector(300, 0, 1)
+    p1 = PowerLaw(0, 1e3, 1e4)
+
+    t1 = make_new_table(pdgid, nevents, c1, p1, coszen)
+
+    mcwd = pd.DataFrame(t1[0])
+    grd = pd.DataFrame(t1[1])
+
+    f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd}
+
+    wf = GenieWeighter(f1, nfiles=nfiles)
+    w1 = wf.get_weights(flux)
+    w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles)
+    np.testing.assert_allclose(w1.sum(), w2)
+    E = mcwd["PrimaryNeutrinoEnergy"]
+    y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b])
+    Ewidth = np.ediff1d(x)
+    np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3)
 
-        t1 = make_new_table(pdgid, nevents, c1, p1, coszen)
 
-        mcwd = pd.DataFrame(t1[0])
-        grd = pd.DataFrame(t1[1])
+def test_empty():
+    with pytest.raises(RuntimeError):
+        x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}}
+        GenieWeighter(x, nfiles=1)
 
-        f1 = {"I3MCWeightDict": mcwd, "I3GENIEResultDict": grd}
+    with pytest.raises(RuntimeError):
+        x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}}
+        GenieWeighter(x)
+
+
+@pytest.mark.parametrize("nfiles", (1, 10, 100))
+@pytest.mark.parametrize("flux", (1e-6, 1, 1e6))
+@pytest.mark.parametrize("nevents", (10000, 100000, 1000000))
+def test_genie_icetray_i3files(nfiles, flux, nevents):
+    coszen = 0.7
+    pdgid = 12
+    energy = 5e3
+    c1 = CircleInjector(300, 0, 1)
+    p1 = PowerLaw(0, 1e3, 1e4)
+
+    weight = dataclasses.I3MapStringDouble()
+    weight["NEvents"] = nevents
+    weight["MinZenith"] = np.arccos(c1.cos_zen_max)
+    weight["MaxZenith"] = np.arccos(c1.cos_zen_min)
+    weight["PowerLawIndex"] = -1 * p1.g
+    weight["MinEnergyLog"] = np.log10(p1.a)
+    weight["MaxEnergyLog"] = np.log10(p1.b)
+    weight["InjectionSurfaceR"] = c1.radius
+    weight["GeneratorVolume"] = 1.0
+    weight["PrimaryNeutrinoEnergy"] = energy
 
-        for nfiles in [1, 10, 100]:
-            wf = GenieWeighter(f1, nfiles=nfiles)
-            for flux in [1e-6, 1, 1e6]:
-                w1 = wf.get_weights(flux)
-                w2 = flux * p1.integral * c1.etendue / (0.7 * nfiles)
-                np.testing.assert_allclose(w1.sum(), w2)
-                E = mcwd["PrimaryNeutrinoEnergy"]
-                y, x = np.histogram(E, weights=w1, bins=51, range=[p1.a, p1.b])
-                Ewidth = np.ediff1d(x)
-                np.testing.assert_allclose(y, flux * Ewidth * c1.etendue / (0.7 * nfiles), 6e-3)
+    resultdict = simclasses.I3GENIEResultDict()
+    resultdict.neu = pdgid
+    resultdict.pxv = 1
+    resultdict.pyv = 1
+    resultdict.pzv = -coszen
+    resultdict.Ev = energy
+    resultdict.wght = 1.0
+    resultdict._glbprbscale = 1.0
 
-    def test_empty(self):
-        with self.assertRaises(RuntimeError):
-            x = {"I3MCWeightDict": {key: [] for key in mcwd_keys}, "I3GENIEResultDict": {key: [] for key in grd_keys}}
-            GenieWeighter(x, nfiles=1)
+    frame = icetray.I3Frame()
+    frame["I3MCWeightDict"] = weight
+    frame["I3GENIEResultDict"] = resultdict
 
-        with self.assertRaises(RuntimeError):
-            x = {"I3MCWeightDict": {key: [1] for key in mcwd_keys}, "I3GENIEResultDict": {key: [1] for key in grd_keys}}
-            GenieWeighter(x)
+    w1 = GenieWeighter(frame, nfiles=nfiles).get_weights(flux)
+    w2 = flux / c1.pdf(coszen) / p1.pdf(energy) / (0.7 * nfiles * nevents)
+    assert w1 == approx(w2)
 
 
 if __name__ == "__main__":
-    unittest.main()
+    sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]]))
diff --git a/tests/test_genie_weighter.py b/tests/test_genie_weighter.py
index c2394a2..1cd77f1 100755
--- a/tests/test_genie_weighter.py
+++ b/tests/test_genie_weighter.py
@@ -4,12 +4,18 @@
 #
 # SPDX-License-Identifier: BSD-2-Clause
 
-import unittest
+import contextlib
+import sys
 
 import numpy as np
+import pytest
+from pytest import approx
 
 import simweights
 
+with contextlib.suppress(ImportError):
+    from icecube import dataclasses, icetray, simclasses
+
 info_dtype = [
     ("primary_type", np.int32),
     ("n_flux_events", np.int32),
@@ -25,67 +31,103 @@
 result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)]
 
 
-class TestGenieWeighter(unittest.TestCase):
-    def test_genie(self):
-        nevents = 10000
-        coszen = 0.7
-        pdgid = 12
-        c1 = simweights.CircleInjector(300, 0, 1)
-        p1 = simweights.PowerLaw(0, 1e3, 1e4)
-
-        for event_weight in [1e-6, 1e-3, 1]:
-            for nfiles in [1, 5, 50]:
-                for include_volscale in [True, False]:
-                    result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)]
-                    if include_volscale:
-                        result_dtype.append(("volscale", np.float64))
-
-                    weight = np.zeros(nevents, dtype=result_dtype)
-                    weight["neu"] = pdgid
-                    weight["pzv"] = coszen
-                    weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents))
-                    weight["wght"] = event_weight
-
-                    if include_volscale:
-                        weight["volscale"] = 1
-
-                    rows = nfiles * [
-                        (
-                            pdgid,
-                            nevents,
-                            1,
-                            c1.radius,
-                            np.arccos(c1.cos_zen_max),
-                            np.arccos(c1.cos_zen_min),
-                            p1.a,
-                            p1.b,
-                            p1.g,
-                        ),
-                    ]
-                    info = np.array(rows, dtype=info_dtype)
-                    d = {"I3GenieResult": weight, "I3GenieInfo": info}
-
-                    for flux in [0.1, 1, 10]:
-                        wobj = simweights.GenieWeighter(d)
-                        w = wobj.get_weights(flux)
-                        np.testing.assert_allclose(
-                            w.sum(),
-                            flux * event_weight * c1.etendue * p1.integral / nfiles,
-                        )
-                        E = d["I3GenieResult"]["Ev"]
-                        y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b])
-                        Ewidth = np.ediff1d(x)
-                        np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3)
-
-        with self.assertRaises(RuntimeError):
-            simweights.GenieWeighter(d, nfiles=10)
-
-        with self.assertRaises(TypeError):
-            simweights.GenieWeighter({"I3CorsikaWeight": weight})
-
-        with self.assertRaises(KeyError):
-            simweights.GenieWeighter({"I3GenieResult": weight})
+@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1))
+@pytest.mark.parametrize("nfiles", (1, 5, 50))
+@pytest.mark.parametrize("include_volscale", (True, False))
+@pytest.mark.parametrize("flux", (0.1, 1, 10))
+def test_genie_reader_weighter(event_weight, nfiles, include_volscale, flux):
+    nevents = 10000
+    coszen = 0.7
+    pdgid = 12
+    c1 = simweights.CircleInjector(300, 0, 1)
+    p1 = simweights.PowerLaw(0, 1e3, 1e4)
+
+    result_dtype = [("neu", np.int32), ("pzv", np.float64), ("Ev", np.float64), ("wght", np.float64)]
+    if include_volscale:
+        result_dtype.append(("volscale", np.float64))
+
+    weight = np.zeros(nevents, dtype=result_dtype)
+    weight["neu"] = pdgid
+    weight["pzv"] = coszen
+    weight["Ev"] = p1.ppf(np.linspace(0, 1, nevents))
+    weight["wght"] = event_weight
+
+    if include_volscale:
+        weight["volscale"] = 1
+
+    rows = nfiles * [
+        (
+            pdgid,
+            nevents,
+            1,
+            c1.radius,
+            np.arccos(c1.cos_zen_max),
+            np.arccos(c1.cos_zen_min),
+            p1.a,
+            p1.b,
+            p1.g,
+        ),
+    ]
+    info = np.array(rows, dtype=info_dtype)
+    d = {"I3GenieResult": weight, "I3GenieInfo": info}
+
+    wobj = simweights.GenieWeighter(d)
+    w = wobj.get_weights(flux)
+    np.testing.assert_allclose(
+        w.sum(),
+        flux * event_weight * c1.etendue * p1.integral / nfiles,
+    )
+    E = d["I3GenieResult"]["Ev"]
+    y, x = np.histogram(E, weights=w, bins=51, range=[p1.a, p1.b])
+    Ewidth = np.ediff1d(x)
+    np.testing.assert_allclose(y, flux * event_weight * Ewidth * c1.etendue / nfiles, 5e-3)
+
+    with pytest.raises(RuntimeError):
+        simweights.GenieWeighter(d, nfiles=10)
+
+    with pytest.raises(TypeError):
+        simweights.GenieWeighter({"I3CorsikaWeight": weight})
+
+    with pytest.raises(KeyError):
+        simweights.GenieWeighter({"I3GenieResult": weight})
+
+
+@pytest.mark.parametrize("event_weight", (1e-6, 1e-3, 1))
+@pytest.mark.parametrize("volscale", (1, 2, 3))
+@pytest.mark.parametrize("flux", (0.1, 1, 10))
+def test_genie_reader_weighter_i3file(event_weight, volscale, flux):
+    nevents = 10000
+    coszen = 0.7
+    pdgid = 12
+    energy = 5e3
+    c1 = simweights.CircleInjector(300, 0, 1)
+    p1 = simweights.PowerLaw(0, 1e3, 1e4)
+
+    weight = simclasses.I3GenieResult()
+    weight.neu = pdgid
+    weight.pzv = coszen
+    weight.Ev = energy
+    weight.wght = event_weight
+    weight.volscale = volscale
+
+    info = simclasses.I3GenieInfo()
+    info.primary_type = dataclasses.I3Particle.ParticleType(pdgid)
+    info.n_flux_events = nevents
+    info.global_probability_scale = 1
+    info.cylinder_radius = c1.radius
+    info.min_zenith = np.arccos(c1.cos_zen_max)
+    info.max_zenith = np.arccos(c1.cos_zen_min)
+    info.min_energy = p1.a
+    info.max_energy = p1.b
+    info.power_law_index = p1.g
+
+    frame = icetray.I3Frame()
+    frame["I3GenieResult"] = weight
+    frame["I3GenieInfo"] = info
+
+    w = simweights.GenieWeighter(frame).get_weights(flux)
+    assert w == approx(flux * volscale * event_weight / c1.pdf(coszen) / p1.pdf(energy) / nevents)
 
 
 if __name__ == "__main__":
-    unittest.main()
+    sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]]))