diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 18c438e..c90496c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Develop Major Features and Improvements ------------------------------- +- added CMSShape PDF Breaking changes ------------------ diff --git a/requirements_dev.txt b/requirements_dev.txt index 579934e..03c364d 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,6 +2,7 @@ bumpversion>=0.5.3 coverage>=4.5.1 coverage>=4.5.1 flake8>=3.5.0 +numba-stats @ git+https://github.com/HDembinski/numba-stats.git # CMSShape not yet released (expected 1.8.0) pip>=9.0.1 pre-commit pytest>=3.4.2 diff --git a/tests/test_pdf_cmsshape.py b/tests/test_pdf_cmsshape.py new file mode 100644 index 0000000..f91d859 --- /dev/null +++ b/tests/test_pdf_cmsshape.py @@ -0,0 +1,76 @@ +"""Tests for CMSShape PDF.""" +import numpy as np +import pytest +import tensorflow as tf +import zfit +from numba_stats import cmsshape as cmsshape_numba + +# Important, do the imports below +from zfit.core.testing import tester + +import zfit_physics as zphys + +# specify globals here. Do NOT add any TensorFlow but just pure python +m_true = 90.0 +beta_true = 0.2 +gamma_true = 0.3 + + +def create_cmsshape(m, beta, gamma, limits): + obs = zfit.Space("obs1", limits) + cmsshape = zphys.pdf.CMSShape(m=m, beta=beta, gamma=gamma, obs=obs) + return cmsshape, obs + + +def test_cmsshape_pdf(): + # Test PDF here + cmsshape, _ = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130)) + assert zfit.run(cmsshape.pdf(90.0)) == pytest.approx( + cmsshape_numba.pdf(90.0, beta=beta_true, gamma=gamma_true, loc=m_true).item(), rel=1e-5 + ) + np.testing.assert_allclose( + cmsshape.pdf(tf.range(50.0, 130, 10_000)), + cmsshape_numba.pdf(tf.range(50.0, 130, 10_000).numpy(), beta=beta_true, gamma=gamma_true, loc=m_true), + rtol=1e-5, + ) + assert cmsshape.pdf(tf.range(50.0, 130, 10_000)) <= cmsshape.pdf(90.0) + + sample = cmsshape.sample(1000) + tf.debugging.assert_all_finite(sample.value(), "Some samples from the cmsshape PDF are NaN or infinite") + assert sample.n_events == 1000 + assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130)) + + +def test_cmsshape_integral(): + # Test CDF and integral here + cmsshape, obs = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130)) + full_interval_analytic = zfit.run(cmsshape.analytic_integrate(obs, norm_range=False)) + full_interval_numeric = zfit.run(cmsshape.numeric_integrate(obs, norm_range=False)) + true_integral = 0.99999 + numba_stats_full_integral = cmsshape_numba.cdf( + 130, beta=beta_true, gamma=gamma_true, loc=m_true + ) - cmsshape_numba.cdf(50, beta=beta_true, gamma=gamma_true, loc=m_true) + assert full_interval_analytic == pytest.approx(true_integral, 1e-5) + assert full_interval_numeric == pytest.approx(true_integral, 1e-5) + assert full_interval_analytic == pytest.approx(numba_stats_full_integral, 1e-8) + assert full_interval_numeric == pytest.approx(numba_stats_full_integral, 1e-8) + + analytic_integral = zfit.run(cmsshape.analytic_integrate(limits=(80, 100), norm_range=False)) + numeric_integral = zfit.run(cmsshape.numeric_integrate(limits=(80, 100), norm_range=False)) + numba_stats_integral = cmsshape_numba.cdf(100, beta=beta_true, gamma=gamma_true, loc=m_true) - cmsshape_numba.cdf( + 80, beta=beta_true, gamma=gamma_true, loc=m_true + ) + assert analytic_integral == pytest.approx(numeric_integral, 1e-8) + assert analytic_integral == pytest.approx(numba_stats_integral, 1e-8) + + +# register the pdf here and provide sets of working parameter configurations +def cmsshape_params_factory(): + m = zfit.Parameter("m", m_true) + beta = zfit.Parameter("beta", beta_true) + gamma = zfit.Parameter("gamma", gamma_true) + + return {"m": m, "beta": beta, "gamma": gamma} + + +tester.register_pdf(pdf_class=zphys.pdf.CMSShape, params_factories=cmsshape_params_factory) diff --git a/zfit_physics/models/pdf_cmsshape.py b/zfit_physics/models/pdf_cmsshape.py new file mode 100644 index 0000000..196d40e --- /dev/null +++ b/zfit_physics/models/pdf_cmsshape.py @@ -0,0 +1,144 @@ +from typing import Optional + +import tensorflow as tf +import zfit +from zfit import z +from zfit.core.space import ANY_LOWER, ANY_UPPER, Space +from zfit.util import ztyping + + +@z.function(wraps="tensor") +def cmsshape_pdf_func(x, m, beta, gamma): + """Calculate the CMSShape PDF. + + Args: + x: value(s) for which the PDF will be calculated. + m: approximate center of the disribution. + beta: steepness of the error function. + gamma: steepness of the exponential distribution. + + Returns: + `tf.Tensor`: The calculated PDF values. + + Notes: + Based on code from `spark_tnp `_ and + `numba-stats `_. + """ + x = z.unstack_x(x) + half = 0.5 + two = 2.0 + t1 = tf.math.exp(-gamma * (x - m)) + t2 = tf.math.erfc(-beta * (x - m)) + t3 = half * gamma * tf.math.exp(-((half * gamma / beta) ** two)) + return t1 * t2 * t3 + + +@z.function(wraps="tensor") +def cmsshape_cdf_func(x, m, beta, gamma): + """Analtical function for the CDF of the CMSShape distribution. + + Args: + x: value(s) for which the CDF will be calculated. + m: approximate center of the distribution. + beta: steepness of the error function. + gamma: steepness of the exponential distribution. + + Returns: + `tf.Tensor`: The calculated CDF values. + + Notes: + Based on code from `spark_tnp `_ and + `numba-stats `_ + """ + half = 0.5 + two = 2.0 + y = x - m + t1 = tf.math.erf(gamma / (two * beta) + beta * y) + t2 = tf.math.exp(-((gamma / (two * beta)) ** two) - gamma * y) + t3 = tf.math.erfc(-beta * y) + return half * (t1 - t2 * t3) + half + + +def cmsshape_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor: + """Calculates the analytic integral of the CMSShape PDF. + + Args: + limits: An object with attribute limit1d. + params: A hashmap from which the parameters that defines the PDF will be extracted. + model: Will be ignored. + + Returns: + The calculated integral. + """ + lower, upper = limits.limit1d + m = params["m"] + beta = params["beta"] + gamma = params["gamma"] + lower_cdf = cmsshape_cdf_func(x=lower, m=m, beta=beta, gamma=gamma) + upper_cdf = cmsshape_cdf_func(x=upper, m=m, beta=beta, gamma=gamma) + return upper_cdf - lower_cdf + + +class CMSShape(zfit.pdf.BasePDF): + _N_OBS = 1 + + def __init__( + self, + m: ztyping.ParamTypeInput, + beta: ztyping.ParamTypeInput, + gamma: ztyping.ParamTypeInput, + obs: ztyping.ObsTypeInput, + *, + extended: Optional[ztyping.ExtendedInputType] = None, + norm: Optional[ztyping.NormInputType] = None, + name: str = "CMSShape", + ): + """CMSShape PDF. + + The distribution consists of an exponential decay suppressed at small values by the + complementary error function. The product is an asymmetric peak with a bell shape on the + left-hand side at low mass due to threshold effect and an exponential tail on the right-hand side. + This shape is used by the CMS experiment to model the background in the invariant mass distribution + of Z to ll decay candidates. + + Formula for the PDF and CDF are based on code from + `spark_tnp `_ and + `numba-stats `_ + + Args: + m: Approximate center of the distribution. + beta: Steepness of the error function. + gamma: Steepness of the exponential distribution. + obs: |@doc:pdf.init.obs| Observables of the + model. This will be used as the default space of the PDF and, + if not given explicitly, as the normalization range. + + The default space is used for example in the sample method: if no + sampling limits are given, the default space is used. + + The observables are not equal to the domain as it does not restrict or + truncate the model outside this range. |@docend:pdf.init.obs| + extended: |@doc:pdf.init.extended| The overall yield of the PDF. + If this is parameter-like, it will be used as the yield, + the expected number of events, and the PDF will be extended. + An extended PDF has additional functionality, such as the + ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended| + norm: |@doc:pdf.init.norm| Normalization of the PDF. + By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm| + name: |@doc:pdf.init.name| Human-readable name + or label of + the PDF for better identification. + Has no programmatical functional purpose as identification. |@docend:pdf.init.name| + """ + params = {"m": m, "beta": beta, "gamma": gamma} + super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm) + + def _unnormalized_pdf(self, x: tf.Tensor) -> tf.Tensor: + m = self.params["m"] + beta = self.params["beta"] + gamma = self.params["gamma"] + return cmsshape_pdf_func(x=x, m=m, beta=beta, gamma=gamma) + + +cmsshape_integral_limits = Space(axes=(0,), limits=(((ANY_LOWER,),), ((ANY_UPPER,),))) +CMSShape.register_analytic_integral(func=cmsshape_integral, limits=cmsshape_integral_limits) diff --git a/zfit_physics/pdf.py b/zfit_physics/pdf.py index 875e0bd..2e7112b 100644 --- a/zfit_physics/pdf.py +++ b/zfit_physics/pdf.py @@ -1,4 +1,5 @@ from .models.pdf_argus import Argus +from .models.pdf_cmsshape import CMSShape from .models.pdf_relbw import RelativisticBreitWigner -__all__ = ["Argus", "RelativisticBreitWigner"] +__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape"]