diff --git a/.gitignore b/.gitignore
index 1d59eb1..4fd5169 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,3 +103,5 @@ venv.bak/
 # mypy
 .mypy_cache/
 /.idea/sonarlint/*
+/tests/tfpwa/data/
+/src/zfit_physics/_version.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e93f576..b9159a2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,6 +26,16 @@ repos:
 #      - id: docformatter
 #        args: [ -r, --in-place, --wrap-descriptions, '120', --wrap-summaries, '120', -- ]
 
+  - repo: local
+    hooks:
+      - id: doc arg replacer
+        name: docarg
+        entry: utils/api/replace_argdocs.py
+        language: python
+        always_run: true
+        additional_dependencies: [ pyyaml ]
+
+
   - repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.10.0
     hooks:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4248f7b..5be70da 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,7 @@ Develop
 
 Major Features and Improvements
 -------------------------------
+- `TF-PWA <https://tf-pwa.readthedocs.io/en/latest/>`_ support for loss functions. Minimizer can directly minimize the loss function of a model.
 
 Breaking changes
 ------------------
diff --git a/docs/api/static/zfit_physics.pdf.rst b/docs/api/static/zfit_physics.pdf.rst
index 8c4d3c1..8bd8e34 100644
--- a/docs/api/static/zfit_physics.pdf.rst
+++ b/docs/api/static/zfit_physics.pdf.rst
@@ -1,5 +1,5 @@
-pdf
-===
+PDFs
+=======================
 
 .. automodule:: zfit_physics.pdf
     :members:
diff --git a/docs/api/static/zfit_physics.tfpwa.rst b/docs/api/static/zfit_physics.tfpwa.rst
new file mode 100644
index 0000000..ec866eb
--- /dev/null
+++ b/docs/api/static/zfit_physics.tfpwa.rst
@@ -0,0 +1,44 @@
+TF-PWA
+=======================
+
+TFPWA is a generic software package intended for Partial Wave Analysis (PWA). It can be connected with zfit,
+currently by providing a loss function that can be minimized by a zfit minimizer.
+
+Import the module with:
+
+.. code-block:: python
+
+    import zfit_physics.tfpwa as ztfpwa
+
+This will enable that :py:function:~`tfpwa.model.FCN` can be used as a loss function in zfit minimizers as
+
+.. code-block:: python
+
+    minimizer.minimize(loss=fcn)
+
+More explicitly, the loss function can be created with
+
+.. code-block:: python
+
+    nll = ztfpwa.loss.nll_from_fcn(fcn)
+
+which optionally takes already created :py:class:~`zfit.core.interfaces.ZfitParameter` as arguments.
+
+
+Variables
+++++++++++++
+
+
+.. automodule:: zfit_physics.tfpwa.variables
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Loss
+++++++++++++
+
+.. automodule:: zfit_physics.tfpwa.loss
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
index 94f2f3c..96397b2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,6 +22,8 @@
 import sys
 from pathlib import Path
 
+import yaml
+
 sys.path.insert(0, str(Path("..").resolve()))
 
 import zfit_physics
@@ -68,7 +70,7 @@
     "sphinx_copybutton",
     "sphinxcontrib.youtube",
     "sphinx_panels",
-    "seed_intersphinx_mapping",
+    # "seed_intersphinx_mapping",
     "myst_nb",
     "sphinx_togglebutton",
 ]
@@ -115,6 +117,27 @@
 }
 autodoc_inherit_docstrings = False
 
+
+# add whitespaces to the internal commands. Maybe move to preprocessing?
+project_dir = Path(__file__).parents[1]
+rst_epilog = """
+.. |wzw| unicode:: U+200B
+   :trim:
+
+"""
+# ..  replace:: |wzw|
+#
+# .. |@docend| replace:: |wzw|
+# """
+with Path(project_dir / "utils/api/argdocs.yaml").open() as replfile:
+    replacements = yaml.load(replfile, Loader=yaml.Loader)
+for replacement_key in replacements:
+    rst_epilog += f"""
+.. |@doc:{replacement_key}| replace:: |wzw|
+
+.. |@docend:{replacement_key}| replace:: |wzw|
+"""
+
 # -- autosummary settings ---------------------------------------------
 
 autosummary_generate = True
diff --git a/docs/index.rst b/docs/index.rst
index 7dd522f..f68d2bf 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -23,3 +23,11 @@ PDF documentation
     :maxdepth: 2
 
     api/static/zfit_physics.pdf
+
+Extensions
+----------
+
+.. toctree::
+    :maxdepth: 1
+
+    api/static/zfit_physics.tfpwa.rst
diff --git a/pyproject.toml b/pyproject.toml
index f247509..e8de634 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,18 @@ dependencies = ["zfit>=0.20"]
 dynamic = ["version"]
 
 [project.optional-dependencies]
+
+tfpwa = ["tfpwa@git+https://github.com/jiangyi15/tf-pwa"]
+
+all = ["zfit-physics[tfpwa]"]
+test = [
+    "pytest",
+    "pytest-cov",
+    "pytest-rerunfailures",
+    "pytest-xdist",
+    "zfit-physics[all]",
+    "contextlib_chdir",  # backport of chdir from Python 3.11
+]
 dev = [
     "bumpversion>=0.5.3",
     "coverage>=4.5.1",
@@ -46,11 +58,7 @@ dev = [
     "pip>=9.0.1",
     "pre-commit",
     "pydata-sphinx-theme>=0.9", # new dark theme configuration
-    "pytest>=3.4.2",
-    "pytest-cov",
-    "pytest-rerunfailures>=6",
-    "pytest-runner>=2.11.1",
-    "pytest-xdist",
+    "pyyaml",
     "seed_intersphinx_mapping",
     "setupext-janitor",
     "Sphinx>=3.5.4",
@@ -65,6 +73,7 @@ dev = [
     "twine>=1.10.0",
     "watchdog>=0.8.3",
     "wheel>=0.29.0",
+    "zfit-physics[test]",
 ]
 
 [project.urls]
@@ -74,16 +83,11 @@ Repository = "https://github.com/zfit/zfit-physics"
 Discussions = "https://github.com/zfit/zfit-physics/discussions"
 Changelog = "https://github.com/zfit/zfit-physics/blob/main/CHANGELOG.rst"
 
-
-
-
-
-
-
-
 [tool.hatch]
 version.source = "vcs"
 build.hooks.vcs.version-file = "src/zfit_physics/_version.py"
+metadata.allow-direct-references = true
+
 
 [tool.pytest.ini_options]
 minversion = "6.0"
diff --git a/src/zfit_physics/__init__.py b/src/zfit_physics/__init__.py
index 6220aae..5da2608 100644
--- a/src/zfit_physics/__init__.py
+++ b/src/zfit_physics/__init__.py
@@ -6,7 +6,7 @@
 
 __license__ = "BSD 3-Clause"
 __copyright__ = "Copyright 2019, zfit"
-__status__ = "Pre-alpha"
+__status__ = "Beta"
 
 __author__ = "zfit"
 __maintainer__ = "zfit"
@@ -18,6 +18,6 @@
     # TODO(release): add more, Anton etc
 ]
 
-__all__ = ["pdf"]
+__all__ = ["pdf", "unstable"]
 
 from . import pdf, unstable
diff --git a/src/zfit_physics/models/pdf_argus.py b/src/zfit_physics/models/pdf_argus.py
index f08516a..d2b00cb 100644
--- a/src/zfit_physics/models/pdf_argus.py
+++ b/src/zfit_physics/models/pdf_argus.py
@@ -81,6 +81,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             m0: Maximal energetically allowed mass, cutoff
@@ -93,10 +103,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+           label: |@doc:pdf.init.label| Human-readable name
                or label of
-               the PDF for better identification. |@docend:pdf.init.name|
-           label: |@doc:pdf.init.label| Label of the PDF, if None is given, it will be the name. |@docend:pdf.init.label|
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
 
         Returns:
             `tf.Tensor`: the values matching the (broadcasted) shapes of the input
diff --git a/src/zfit_physics/models/pdf_cmsshape.py b/src/zfit_physics/models/pdf_cmsshape.py
index fc12718..7df2341 100644
--- a/src/zfit_physics/models/pdf_cmsshape.py
+++ b/src/zfit_physics/models/pdf_cmsshape.py
@@ -117,6 +117,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -126,11 +136,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+            label: |@doc:pdf.init.label| Human-readable name
                or label of
-               the PDF for better identification.
-               Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
-            label: |@doc:pdf.init.label| Label of the PDF, if None is given, it will be the name. |@docend:pdf.init.label|
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         params = {"m": m, "beta": beta, "gamma": gamma}
         super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm, label=label)
diff --git a/src/zfit_physics/models/pdf_conv.py b/src/zfit_physics/models/pdf_conv.py
index 2e239d3..39612f9 100644
--- a/src/zfit_physics/models/pdf_conv.py
+++ b/src/zfit_physics/models/pdf_conv.py
@@ -40,6 +40,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -49,10 +59,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+           label: |@doc:pdf.init.label| Human-readable name
                or label of
-               the PDF for better identification. |@docend:pdf.init.name|
-           label: |@doc:pdf.init.label| Label of the PDF, if None is given, it will be the name. |@docend:pdf.init.label|
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         super().__init__(obs=obs, pdfs=[func, kernel], params={}, name=name, extended=extended, norm=norm, label=label)
         limits = self._check_input_limits(limits=limits)
diff --git a/src/zfit_physics/models/pdf_cruijff.py b/src/zfit_physics/models/pdf_cruijff.py
index f3491f6..d1f19a1 100644
--- a/src/zfit_physics/models/pdf_cruijff.py
+++ b/src/zfit_physics/models/pdf_cruijff.py
@@ -73,26 +73,38 @@ def __init__(
             sigmar: Right width parameter.
             alphar: Right tail acceleration parameter.
         obs: |@doc:pdf.init.obs| Observables of the
-           model. This will be used as the default space of the PDF and,
-           if not given explicitly, as the normalization range.
+               model. This will be used as the default space of the PDF and,
+               if not given explicitly, as the normalization range.
 
-           The default space is used for example in the sample method: if no
-           sampling limits are given, the default space is used.
+               The default space is used for example in the sample method: if no
+               sampling limits are given, the default space is used.
 
-           The observables are not equal to the domain as it does not restrict or
-           truncate the model outside this range. |@docend:pdf.init.obs|
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               The observables are not equal to the domain as it does not restrict or
+               truncate the model outside this range. |@docend:pdf.init.obs|
         extended: |@doc:pdf.init.extended| The overall yield of the PDF.
-           If this is parameter-like, it will be used as the yield,
-           the expected number of events, and the PDF will be extended.
-           An extended PDF has additional functionality, such as the
-           ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
+               If this is parameter-like, it will be used as the yield,
+               the expected number of events, and the PDF will be extended.
+               An extended PDF has additional functionality, such as the
+               ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
         norm: |@doc:pdf.init.norm| Normalization of the PDF.
-           By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-        name: |@doc:pdf.init.name| Human-readable name
-           or label of
-           the PDF for better identification.
-           Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
-        label: |@doc:pdf.init.label| A human readable label to identify the PDF. |@docend:pdf.init.label|
+               By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
+        name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+        label: |@doc:pdf.init.label| Human-readable name
+               or label of
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         params = {"mu": mu, "sigmal": sigmal, "alphal": alphal, "sigmar": sigmar, "alphar": alphar}
         super().__init__(obs=obs, params=params, extended=extended, norm=norm, name=name, label=label)
diff --git a/src/zfit_physics/models/pdf_erfexp.py b/src/zfit_physics/models/pdf_erfexp.py
index 1abce22..49e0265 100644
--- a/src/zfit_physics/models/pdf_erfexp.py
+++ b/src/zfit_physics/models/pdf_erfexp.py
@@ -74,6 +74,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -83,11 +93,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+            label: |@doc:pdf.init.label| Human-readable name
                or label of
-               the PDF for better identification.
-               Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
-            label: |@doc:pdf.init.label| A human readable label to identify the PDF. |@docend:pdf.init.label|
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         params = {"mu": mu, "beta": beta, "gamma": gamma, "n": n}
         super().__init__(obs=obs, params=params, extended=extended, norm=norm, name=name, label=label)
diff --git a/src/zfit_physics/models/pdf_kde.py b/src/zfit_physics/models/pdf_kde.py
index adfd122..ebb26eb 100644
--- a/src/zfit_physics/models/pdf_kde.py
+++ b/src/zfit_physics/models/pdf_kde.py
@@ -36,6 +36,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -45,10 +55,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
+           label: |@doc:pdf.init.label| Human-readable name
                or label of
-               the PDF for better identification. |@docend:pdf.init.name|
-           label: |@doc:pdf.init.label| Label of the PDF, if None is given, it will be the name. |@docend:pdf.init.label|
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         dtype = zfit.settings.ztypes.float
         if isinstance(data, zfit.core.interfaces.ZfitData):
diff --git a/src/zfit_physics/models/pdf_novosibirsk.py b/src/zfit_physics/models/pdf_novosibirsk.py
index 27cff22..4059b99 100644
--- a/src/zfit_physics/models/pdf_novosibirsk.py
+++ b/src/zfit_physics/models/pdf_novosibirsk.py
@@ -149,6 +149,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -162,9 +172,9 @@ def __init__(
                Maybe has implications on the serialization and deserialization of the PDF.
                For a human-readable name, use the label. |@docend:pdf.init.name|
             label: |@doc:pdf.init.label| Human-readable name
-                or label of
-                the PDF for a better description, to be used with plots etc.
-                Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
+               or label of
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         params = {"mu": mu, "sigma": sigma, "lambd": lambd}
         super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm, label=label)
diff --git a/src/zfit_physics/models/pdf_relbw.py b/src/zfit_physics/models/pdf_relbw.py
index 1e5aabe..2b615aa 100644
--- a/src/zfit_physics/models/pdf_relbw.py
+++ b/src/zfit_physics/models/pdf_relbw.py
@@ -58,6 +58,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -71,9 +81,9 @@ def __init__(
                Maybe has implications on the serialization and deserialization of the PDF.
                For a human-readable name, use the label. |@docend:pdf.init.name|
             label: |@doc:pdf.init.label| Human-readable name
-                or label of
-                the PDF for a better description, to be used with plots etc.
-                Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
+               or label of
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         params = {"m": m, "gamma": gamma}
         super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm, label=label)
diff --git a/src/zfit_physics/models/pdf_tsallis.py b/src/zfit_physics/models/pdf_tsallis.py
index cb08ce0..eca29db 100644
--- a/src/zfit_physics/models/pdf_tsallis.py
+++ b/src/zfit_physics/models/pdf_tsallis.py
@@ -125,6 +125,16 @@ def __init__(
                The default space is used for example in the sample method: if no
                sampling limits are given, the default space is used.
 
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+               If the observables are binned and the model is unbinned, the
+               model will be a binned model, by wrapping the model in a
+               :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+               calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
                The observables are not equal to the domain as it does not restrict or
                truncate the model outside this range. |@docend:pdf.init.obs|
             extended: |@doc:pdf.init.extended| The overall yield of the PDF.
@@ -134,14 +144,13 @@ def __init__(
                ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
             norm: |@doc:pdf.init.norm| Normalization of the PDF.
                By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
-            name: |@doc:pdf.init.name| Human-readable name
-               or label of
-               the PDF for better identification.
-               Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
+            name: |@doc:pdf.init.name| Name of the PDF.
+               Maybe has implications on the serialization and deserialization of the PDF.
+               For a human-readable name, use the label. |@docend:pdf.init.name|
             label: |@doc:pdf.init.label| Human-readable name
-                or label of
-                the PDF for a better description, to be used with plots etc.
-                Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
+               or label of
+               the PDF for a better description, to be used with plots etc.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
         """
         if run.executing_eagerly():
             if n <= 2:
diff --git a/src/zfit_physics/tfpwa/__init__.py b/src/zfit_physics/tfpwa/__init__.py
new file mode 100644
index 0000000..c0be90e
--- /dev/null
+++ b/src/zfit_physics/tfpwa/__init__.py
@@ -0,0 +1,3 @@
+from . import loss, variables
+
+__all__ = ["loss", "variables"]
diff --git a/src/zfit_physics/tfpwa/loss.py b/src/zfit_physics/tfpwa/loss.py
new file mode 100644
index 0000000..5f64eac
--- /dev/null
+++ b/src/zfit_physics/tfpwa/loss.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Optional, Union
+
+if TYPE_CHECKING:
+    import tf_pwa
+
+import zfit
+import zfit.z.numpy as znp
+from zfit.core.interfaces import ZfitParameter
+from zfit.util.container import convert_to_container
+
+from .variables import params_from_fcn
+
+ParamType = Optional[Union[ZfitParameter, Iterable[ZfitParameter]]]
+
+
+def nll_from_fcn(fcn: tf_pwa.model.FCN, *, params: ParamType = None):
+    """Create a zfit loss from a tf_pwa FCN.
+
+    Args:
+        fcn: A tf_pwa.FCN
+        params: list of zfit.Parameter, optional
+            Parameters to use in the loss. If None, all trainable parameters in the FCN are used.
+
+    Returns:
+        zfit.loss.SimpleLoss
+    """
+    params = params_from_fcn(fcn) if params is None else convert_to_container(params, container=list)
+    paramnames = tuple(p.name for p in params)
+
+    # something is off here: for the value, we need to pass the parameters as a dict
+    # but for the gradient/hesse, we need to pass them as a list
+    # TODO: activate if https://github.com/jiangyi15/tf-pwa/pull/153 is merged
+    # @z.function(wraps="loss")
+    def eval_func(params):
+        paramdict = make_paramdict(params)
+        return fcn(paramdict)
+
+    # TODO: activate if https://github.com/jiangyi15/tf-pwa/pull/153 is merged
+    # @z.function(wraps="loss")
+    def eval_grad(params):
+        return fcn.nll_grad(params)[1]
+
+    def make_paramdict(params, *, paramnames=paramnames):
+        return {p: znp.array(v.value()) for p, v in zip(paramnames, params)}
+
+    return zfit.loss.SimpleLoss(
+        func=eval_func,
+        params=params,
+        errordef=0.5,
+        gradient=eval_grad,
+        hessian=lambda x: fcn.nll_grad_hessian(x)[2],
+        jit=False,
+    )
+
+
+def _nll_from_fcn_or_false(fcn: tf_pwa.model.FCN, *, params: ParamType = None) -> zfit.loss.SimpleLoss | bool:
+    try:
+        from tf_pwa.model import FCN
+    except ImportError:
+        return False
+    else:
+        if isinstance(fcn, FCN):
+            return nll_from_fcn(fcn, params=params)
+    return False
+
+
+zfit.loss.SimpleLoss.register_convertable_loss(_nll_from_fcn_or_false, priority=50)
+# Maybe add actually a custom loss?
+# class TFPWALoss(zfit.loss.BaseLoss):
+#     def __init__(self, loss, params=None):
+#         if params is None:
+#             params =  [zfit.Parameter(n, v) for n, v in amp.get_params().items() if n in fcn.vm.trainable_vars]
+#         self._lossparams = params
+#         super().__init__(model=[], data=[], options={"subtr_const": False}, jit=False)
+#         self._errordef = 0.5
+#         self._tfpwa_loss = loss
+#
+#     def _value(self, model, data, fit_range, constraints, log_offset):
+#         return self._tfpwa_loss(self._lossparams)
+#
+#     def _value_gradient(self, params, numgrad, full=None):
+#         return self._tfpwa_loss.get_nll_grad(params)
+#
+#     def _value_gradient_hessian(self, params, hessian, numerical=False, full: bool | None = None):
+#         return self._tfpwa_loss.get_nll_grad_hessian(params)
+#
+#     # below is a small hack as zfit is reworking it's loss currently
+#     def _get_params(
+#             self,
+#             floating: bool | None = True,
+#             is_yield: bool | None = None,
+#             extract_independent: bool | None = True,
+#     ):
+#         params = super()._get_params(floating, is_yield, extract_independent)
+#         from zfit.core.baseobject import extract_filter_params
+#         own_params = extract_filter_params(self._lossparams, floating=floating, extract_independent=extract_independent)
+#         return params.union(own_params)
+#
+#     def create_new(self):
+#         raise RuntimeError("Not needed, todo")
+#
+#     def _loss_func(self,):
+#         raise RuntimeError("Not needed, needs new release")
diff --git a/src/zfit_physics/tfpwa/variables.py b/src/zfit_physics/tfpwa/variables.py
new file mode 100644
index 0000000..b5c3c21
--- /dev/null
+++ b/src/zfit_physics/tfpwa/variables.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import tf_pwa
+
+import zfit
+
+
+def params_from_fcn(fcn: tf_pwa.model.FCN) -> list[zfit.Parameter]:
+    """Get zfit.Parameter objects from a tf_pwa.FCN.
+
+
+    Args:
+        fcn: A tf_pwa.FCN
+
+    Returns:
+        list of zfit.Parameter
+    """
+    return [zfit.Parameter(n, v, floating=n in fcn.vm.trainable_vars) for n, v in fcn.get_params().items()]
diff --git a/tests/tfpwa/config.yml b/tests/tfpwa/config.yml
new file mode 100644
index 0000000..9ff334e
--- /dev/null
+++ b/tests/tfpwa/config.yml
@@ -0,0 +1,34 @@
+data:
+  dat_order: [B, C, D]
+  data: ["data/data.dat"]
+  phsp: ["data/PHSP.dat"]
+
+decay:
+  A:
+    - [R_BC, D]
+    - [R_BD, C]
+    - [R_CD, B]
+  R_BC: [B, C]
+  R_BD: [B, D]
+  R_CD: [C, D]
+
+particle:
+  $top:
+    A: { J: 1, P: -1, m0: 4.6, spins: [-1, 1] }
+  $finals:
+    B: { J: 1, P: -1, m0: 2.00698 }
+    C: { J: 1, P: -1, m0: 2.01028 }
+    D: { J: 0, P: -1, m0: 0.13957 }
+  R_BC: { J: 1, P: 1, m0: 4.16, g0: 0.1 }
+  R_BD: { J: 1, P: 1, m0: 2.43, g0: 0.3 }
+  R_CD: { J: 1, P: 1, m0: 2.42, g0: 0.03 }
+
+constrains:
+  particle: null
+  decay: { fix_chain_idx: 0, fix_chain_val: 1 }
+
+plot:
+  mass:
+    R_BC: { display: "$M_{BC}$" }
+    R_BD: { display: "$M_{BD}$" }
+    R_CD: { display: "$M_{CD}$" }
diff --git a/tests/tfpwa/gen_params.json b/tests/tfpwa/gen_params.json
new file mode 100644
index 0000000..284bf67
--- /dev/null
+++ b/tests/tfpwa/gen_params.json
@@ -0,0 +1,40 @@
+{
+  "A->R_BC.DR_BC->B.C_total_0r": 0.4516137715445613,
+  "A->R_BC.DR_BC->B.C_total_0i": -3.443873166811006,
+  "A->R_BC.D_g_ls_0r": 1.0,
+  "A->R_BC.D_g_ls_0i": 0.0,
+  "A->R_BC.D_g_ls_1r": 3.524353145807241,
+  "A->R_BC.D_g_ls_1i": -1.5785358292796043,
+  "R_BC->B.C_g_ls_0r": 1.0,
+  "R_BC->B.C_g_ls_0i": 0.0,
+  "R_BC->B.C_g_ls_1r": 3.657117111335246,
+  "R_BC->B.C_g_ls_1i": -0.7740998165995177,
+  "R_BC->B.C_g_ls_2r": -7.189904912257068,
+  "R_BC->B.C_g_ls_2i": -0.9725007056909462,
+
+  "A->R_BD.CR_BD->B.D_total_0r": 0.37379797096557865,
+  "A->R_BD.CR_BD->B.D_total_0i": 2.1304359777039887,
+  "A->R_BD.C_g_ls_0r": 1.0,
+  "A->R_BD.C_g_ls_0i": 0.0,
+  "A->R_BD.C_g_ls_1r": 7.907873436450139,
+  "A->R_BD.C_g_ls_1i": 1.100472778045793,
+  "A->R_BD.C_g_ls_2r": 4.012710641580947,
+  "A->R_BD.C_g_ls_2i": -2.000376113177179,
+  "R_BD->B.D_g_ls_0r": 1.0,
+  "R_BD->B.D_g_ls_0i": 0.0,
+  "R_BD->B.D_g_ls_1r": -3.1444629758429206,
+  "R_BD->B.D_g_ls_1i": -0.8168606658739426,
+
+  "A->R_CD.BR_CD->C.D_total_0r": 0.14288893206923325,
+  "A->R_CD.BR_CD->C.D_total_0i": -4.643516581555534,
+  "A->R_CD.B_g_ls_0r": 1.0,
+  "A->R_CD.B_g_ls_0i": 0.0,
+  "A->R_CD.B_g_ls_1r": -3.841967271985308,
+  "A->R_CD.B_g_ls_1i": -2.5575694295501816,
+  "A->R_CD.B_g_ls_2r": 2.3315982507642388,
+  "A->R_CD.B_g_ls_2i": -3.2170866088659476,
+  "R_CD->C.D_g_ls_0r": 1.0,
+  "R_CD->C.D_g_ls_0i": 0.0,
+  "R_CD->C.D_g_ls_1r": -2.7951960228603765,
+  "R_CD->C.D_g_ls_1i": -7.300347506991695
+}
diff --git a/tests/tfpwa/test_basic_example_tfpwa.py b/tests/tfpwa/test_basic_example_tfpwa.py
new file mode 100644
index 0000000..62922c3
--- /dev/null
+++ b/tests/tfpwa/test_basic_example_tfpwa.py
@@ -0,0 +1,93 @@
+import numpy as np
+
+try:
+    from contextlib import chdir
+except ImportError:
+    from contextlib_chdir import chdir
+from pathlib import Path
+
+import pytest
+import zfit
+from tf_pwa.config_loader import ConfigLoader
+
+import zfit_physics.tfpwa as ztfpwa
+
+this_dir = Path(__file__).parent
+
+
+def generate_phsp_mc():
+    """Take three-body decay A->BCD for example, we generate a PhaseSpace MC sample and a toy data sample."""
+
+    datpath = (this_dir / "data")
+    datpath.mkdir(exist_ok=True)
+
+    print(f"Generate phase space MC: {datpath / 'PHSP.dat'}")
+    generate_phspMC(Nmc=2000, mc_file=datpath / "PHSP.dat")
+    print(f"Generate toy data: {datpath / 'data.dat'}")
+    generate_toy_from_phspMC(Ndata=120, data_file=datpath / "data.dat")
+    print("Done!")
+
+
+def generate_phspMC(Nmc, mc_file):
+    # We use ConfigLoader to read the information in the configuration file
+    configpath = str(mc_file.parent.parent / "config.yml")
+    config = ConfigLoader(configpath)
+    # Set the parameters in the amplitude model
+    config.set_params(str(mc_file.parent.parent / "gen_params.json"))
+
+    phsp = config.generate_phsp_p(Nmc)
+
+    config.data.savetxt(str(mc_file), phsp)
+
+
+def generate_toy_from_phspMC(Ndata, data_file):
+    # We use ConfigLoader to read the information in the configuration file
+    configpath = str(data_file.parent.parent / "config.yml")
+    config = ConfigLoader(configpath)
+    # Set the parameters in the amplitude model
+    config.set_params(str(data_file.parent.parent / "gen_params.json"))
+
+    data = config.generate_toy_p(Ndata)
+
+    config.data.savetxt(str(data_file), data)
+    return data
+
+
+def test_example1_tfpwa():
+    generate_phsp_mc()
+    config = ConfigLoader(str(this_dir / "config.yml"))
+    # Set init paramters. If not set, we will use random initial parameters
+    config.set_params(str(this_dir / "gen_params.json"))
+
+    with chdir(this_dir):
+        fcn = config.get_fcn()
+        nll = ztfpwa.loss.nll_from_fcn(fcn)
+
+        initial_val = config.get_fcn()(config.get_params())
+        fit_result = config.fit(method="BFGS")
+
+        kwargs = dict(gradient='zfit', tol=0.01)
+        assert pytest.approx(nll.value(), 0.001) == initial_val
+        v, g, h = fcn.nll_grad_hessian()
+        vz, gz, hz = nll.value_gradient_hessian()
+        hz1 = nll.hessian()
+        gz1 = nll.gradient()
+        assert pytest.approx(v, 0.001) == vz
+        np.testing.assert_allclose(g, gz, atol=0.001)
+        np.testing.assert_allclose(h, hz, atol=0.001)
+        np.testing.assert_allclose(h, hz1, atol=0.001)
+        np.testing.assert_allclose(g, gz1, atol=0.001)
+
+        minimizer = zfit.minimize.Minuit(verbosity=7, **kwargs)
+        # minimizer = zfit.minimize.ScipyBFGS(verbosity=7, **kwargs)  # performs bestamba
+        # minimizer = zfit.minimize.NLoptMMAV1(verbosity=7, **kwargs)
+        # minimizer = zfit.minimize.ScipyLBFGSBV1(verbosity=7, **kwargs)
+        # minimizer = zfit.minimize.NLoptLBFGSV1(verbosity=7, **kwargs)
+        # minimizer = zfit.minimize.IpyoptV1(verbosity=7, **kwargs)
+        print(f"Minimizer {minimizer} start with {kwargs}")
+        result = minimizer.minimize(fcn)
+    print(f"Finished minimization with config:{kwargs}")
+    print(result)
+
+    assert result.converged
+    assert pytest.approx(result.fmin, 0.05) == fit_result.min_nll
diff --git a/utils/api/argdocs.yaml b/utils/api/argdocs.yaml
new file mode 100644
index 0000000..d5c64f2
--- /dev/null
+++ b/utils/api/argdocs.yaml
@@ -0,0 +1,812 @@
+space.init.obs: |1+
+    Observable of the space.
+                Serves as the "variable".
+
+space.init.lowerupper: |1+
+    Lower and upper limits of the space, respectively.
+                Each of them should be a scalar-like object.
+
+space.init.limits: |1+
+    A tuple-like object of the limits of the space.
+                These are the lower and upper limits.
+
+space.init.binning: |1+
+    Binning of the space.
+                Currently, only regular and variable binning *with a name* is supported.
+                If an integer or a list of integers is given with
+                lengths equal to the number of observables,
+                it is interpreted as the number of bins and
+                a regular binning is automatically created using the limits as the
+                start and end points.
+
+space.init.name: |1+
+    Name of the space.
+                Maybe has implications on the serialization and deserialization of the space.
+                For a human-readable name, use the label.
+
+space.init.label: |1+
+    Human-readable name
+                or label for a better description of the space, to be used with plots etc.
+                Has no programmatical functional purpose as identification.
+
+pdf.init.name: |1+
+    Name of the PDF.
+                Maybe has implications on the serialization and deserialization of the PDF.
+                For a human-readable name, use the label.
+
+pdf.init.label: |1+
+    Human-readable name
+                or label of
+                the PDF for a better description, to be used with plots etc.
+                Has no programmatical functional purpose as identification.
+
+
+model.args.params: |1+
+    Mapping of the parameter names to the actual
+                values. The parameter names refer to the names of the parameters,
+                typically :py:class:`~zfit.Parameter`, that
+                the model was _initialized_ with, not the name of the models
+                parametrization.
+
+binneddata.param.space: |1+
+    Binned space of the data.
+                The space is used to define the binning and the limits of the data.
+
+binneddata.param.values: |1+
+    Corresponds to the counts of the histogram.
+                Follows the definition of the
+                `Unified Histogram Interface (UHI) <https://uhi.readthedocs.io/en/latest/plotting.html#plotting>`_.
+
+binneddata.param.variances: |1+
+    Corresponds to the uncertainties of the histogram.
+                If ``True``, the uncertainties are created assuming that ``values``
+                have been drawn from a Poisson distribution. Follows the definition of the
+                `Unified Histogram Interface (UHI) <https://uhi.readthedocs.io/en/latest/plotting.html#plotting>`_.
+
+data.param.obs: |1+
+    Space of the data.
+                The space is used to define the observables and the limits of the data.
+
+data.init.obs: |1+
+    Space of the data.
+                The space is used to define the observables and the limits of the data.
+                If the :py:class:`~zfit.Space` has limits, these will be used to cut the
+                data. If the data is already cut, use ``guarantee_limits`` for a possible
+                performance improvement.
+
+data.init.weights: |1+
+    Weights of the data.
+                Has to be 1-D and match the shape of the data (nevents).
+                Note that a weighted dataset may not be supported by all methods
+                or need additional approximations to correct for the weights, taking
+                more time.
+
+data.init.name: |1+
+    Name of the data.
+                This can possibly be used for future identification, with possible
+                implications on the serialization and deserialization of the data.
+                The name should therefore be "machine-readable" and not contain
+                special characters.
+                (currently not used for a special purpose)
+                For a human-readable name or description, use the label.
+
+data.init.label: |1+
+    Human-readable name
+                or label of the data for a better description, to be used with plots etc.
+                Can contain arbitrary characters.
+                Has no programmatical functional purpose as identification.
+
+data.init.guarantee_limits: |1+
+    Guarantee that the data is within the limits.
+                If ``True``, the data will not be checked and _is assumed_ to be within the limits,
+                possibly because it was already cut before. This can lead to a performance
+                improvement as the data does not have to be checked.
+
+data.init.use_hash: |1+
+    If true, store a hash for caching.
+                If a PDF can cache values, this option needs to be enabled for the PDF
+                to be able to cache values.
+
+data.init.returns: |1+
+    ``zfit.Data`` or ``zfit.BinnedData``:
+                A ``Data`` object containing the unbinned data
+                or a ``BinnedData`` if the obs is binned.
+
+binnedpdf.pdf.x: |1+
+    Values to evaluate the PDF at.
+                If this is a ``ZfitBinnedData``-like object, a histogram of *densities*
+                will be returned. If x is a ``ZfitUnbinnedData``-like object, the densities will be
+                evaluated at the points of ``x``.
+
+binnedpdf.out.problike: |1+
+    If the input was unbinned, it returns an array
+                of shape (nevents,). If the input was binned, the dimensions and ordering of
+                the axes corresponds to the input axes.
+
+pdf.init.obs: |1+
+    Observables of the
+                model. This will be used as the default space of the PDF and,
+                if not given explicitly, as the normalization range.
+
+                The default space is used for example in the sample method: if no
+                sampling limits are given, the default space is used.
+
+                If the observables are binned and the model is unbinned, the
+                model will be a binned model, by wrapping the model in a
+                :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+                calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+                If the observables are binned and the model is unbinned, the
+                model will be a binned model, by wrapping the model in a
+                :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
+                calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
+
+                The observables are not equal to the domain as it does not restrict or
+                truncate the model outside this range.
+
+pdf.init.norm: |1+
+    Normalization of the PDF.
+                By default, this is the same as the default space of the PDF.
+
+pdf.pdf.norm: |1+
+    Normalization of the function.
+                By default, this is the ``norm`` of the PDF (which by default is the same as
+                the space of the PDF).
+
+pdf.param.norm: |1+
+    Normalization of the function.
+                By default, this is the ``norm`` of the PDF (which by default is the same as
+                the space of the PDF). Should be ``ZfitSpace`` to define the space
+                to normalize over.
+
+pdf.param.x: |1+
+    Data to evaluate the method on. Should be ``ZfitData``
+                or a mapping of *obs* to numpy-like arrays.
+                If an array is given, the first dimension is interpreted as the events while
+                the second is meant to be the dimensionality of a single event.
+
+pdf.init.extended: |1+
+    The overall yield of the PDF.
+                If this is parameter-like, it will be used as the yield,
+                the expected number of events, and the PDF will be extended.
+                An extended PDF has additional functionality, such as the
+                ``ext_*`` methods and the ``counts`` (for binned PDFs).
+
+
+pdf.init.extended.auto: |1+
+                If ``True``,
+                the PDF will be extended automatically if the PDF is extended
+                using the total number of events in the histogram.
+                This is the default.
+
+pdf.integrate.limits: |1+
+    Limits of the integration.
+
+pdf.partial_integrate.limits: |1+
+    Limits of the integration that will be integrated out.
+                Has to be a subset of the PDFs observables.
+pdf.integrate.norm: |1+
+    Normalization of the integration.
+                By default, this is the same as the default space of the PDF.
+                ``False`` means no normalization and returns the unnormed integral.
+pdf.integrate.options: |1+
+    Options for the integration.
+                Additional options for the integration. Currently supported options are:
+                - type: one of (``bins``)
+                  This hints that bins are integrated. A method that is vectorizable,
+                  non-dynamic and therefore less suitable for complicated functions is chosen.
+
+pdf.param.yield: |1+
+    Yield (expected number of events) of the PDF.
+                This is the expected number of events.
+                If this is parameter-like, it will be used as the yield,
+                the expected number of events, and the PDF will be extended.
+                An extended PDF has additional functionality, such as the
+                ``ext_*`` methods and the ``counts`` (for binned PDFs).
+
+pdf.sample.n: |1+
+    Number of samples to draw.
+                For an extended PDF, the argument is optional and will be the
+                poisson-fluctuated expected number of events, i.e. the yield.
+
+pdf.sample.limits: |1+
+    Limits of the sampling.
+                By default, this is the same as the default space of the PDF.
+
+pdf.binned.counts.x: |1+
+    Data for the binned PDF.
+                The returned counts correspond to the binned axis in ``x``.
+
+pdf.binned.counts.norm: |1+
+    Normalization of the counts.
+                This normalizes the counts so that the actual sum of all counts is
+                equal to the yield.
+
+pdf.polynomial.init.coeff0: |1+
+    Coefficient of the constant term.
+                This is the coefficient of the constant term, i.e. the term
+                :math:`x^0`. If None, set to 1.
+
+pdf.polynomial.init.coeffs: |1+
+    Coefficients of the sum of the polynomial.
+                The coefficients of the polynomial, starting with the first order
+                term. To set the constant term, use ``coeff0``.
+
+pdf.polynomial.init.apply_scaling: |1+
+    Rescale the data so that the actual limits represent (-1, 1).
+                This is usually wanted as the polynomial is defined in this range.
+                Default is ``True``.
+
+
+
+pdf.kde.bandwidth.weights: |1+
+    Weights of each event
+                in *data*, can be None or Tensor-like with shape compatible
+                with *data*. This will change the count of the events, whereas
+                weight :math:`w_i` of :math:`x_i`.
+
+
+pdf.kde.bandwidth.data: |1+
+    Data points to determine the bandwidth
+                from.
+
+pdf.kde.init.data: |1+
+    Data sample to approximate
+              the density from. The points represent positions of the *kernel*,
+              the :math:`x_i`. This is preferrably a ``ZfitData``, but can also
+              be an array-like object.
+
+              If the data has weights, they will be taken into account.
+              This will change the count of the events, whereas
+              weight :math:`w_i` of :math:`x_i` will scale the value of
+              :math:`K_i( x_i)`, resulting in a factor of :math:`\frac{w_i}{\sum w_i} `.
+
+              If no weights are given, each kernel will be scaled by the same
+              constant :math:`\frac{1}{n_{data}}`.
+
+
+pdf.kde.init.obs: |1+
+    Observable space of the KDE.
+              As with any other PDF, this will be used as the default *norm*, but
+              does not define the domain of the PDF. Namely, this can be a smaller
+              space than *data*, as long as the name of the observable match.
+              Using a larger dataset is actually good practice avoiding
+              bountary biases, see also :ref:`sec-boundary-bias-and-padding`.
+
+pdf.kde.init.bandwidth: |1+
+    Bandwidth of the kernel,
+              often also denoted as :math:`h`. For a Gaussian kernel, this
+              corresponds to *sigma*. This can be calculated using
+              pre-defined options or by specifying a numerical value that is
+              broadcastable to *data* -- a scalar or an array-like
+              object with the same size as *data*.
+
+              A scalar value is usually referred to as a global bandwidth while
+              an array holds local bandwidths
+
+pdf.kde.init.kernel: |1+
+    The kernel is the heart
+              of the Kernel Density Estimation, which consists of the sum of
+              kernels around each sample point. Therefore, a kernel should represent
+              the distribution probability of a single data point as close as
+              possible.
+
+              The most widespread kernel is a Gaussian, or Normal, distribution. Due
+              to the law of large numbers, the sum of many (arbitrary) random variables
+              -- this is the case for most real world observable as they are the result of
+              multiple consecutive random effects -- results in a Gaussian distribution.
+              However, there are many cases where this assumption is not per-se true. In
+              this cases an alternative kernel may offer a better choice.
+
+              Valid choices are callables that return a
+              :py:class:`~tensorflow_probability.distribution.Distribution`, such as all distributions
+              that belong to the loc-scale family.
+
+pdf.kde.init.padding: |1+
+    KDEs have a peculiar
+              weakness: the boundaries, as the outside has a zero density. This makes the KDE
+              go down at the bountary as well, as the density approaches zero, no matter what the
+              density inside the boundary was.
+
+              There are two ways to circumvent this problem:
+
+                - the best solution: providing a larger dataset than the default space the PDF is used in
+                - mirroring the existing data at the boundaries, which is equivalent to a boundary condition
+                  with a zero derivative. This is a padding technique and can improve the boundaries.
+                  However, one important drawback of this method is to keep in mind that this will actually
+                  alter the PDF *to look mirrored*. If the PDF is plotted in a larger range, this becomes
+                  clear.
+
+              Possible options are a number (default 0.1) that depicts the fraction of the overall space
+              that defines the data mirrored on both sides. For example, for a space from 0 to 5, a value of
+              0.3 means that all data in the region of 0 to 1.5 is taken, mirrored around 0 as well as
+              all data from 3.5 to 5 and mirrored at 5. The new data will go from -1.5 to 6.5, so the
+              KDE is also having a shape outside the desired range. Using it only for the range 0 to 5
+              hides this.
+              Using a dict, each side separately (or only a single one) can be mirrored, like ``{'lowermirror: 0.1}``
+              or ``{'lowermirror: 0.2, 'uppermirror': 0.1}``.
+              For more control, a callable that takes data and weights can also be used.
+
+
+pdf.kde.init.weights: |1+
+    Weights of each event
+              in *data*, can be None or Tensor-like with shape compatible
+              with *data*. Instead of using this parameter, it is preferred
+              to use a ``ZfitData`` as *data* that contains weights.
+              This will change the count of the events, whereas
+              weight :math:`w_i` of :math:`x_i` will scale the value of :math:`K_i( x_i)`,
+              resulting in a factor of :math:`\frac{w_i}{\sum w_i} `.
+
+              If no weights are given, each kernel will be scaled by the same
+              constant :math:`\frac{1}{n_{data}}`.
+
+pdf.kde.init.num_grid_points: |1+
+    Number of points in
+              the binning grid.
+
+              The data will be binned using the *binning_method* in *num_grid_points*
+              and this histogram grid will then be used as kernel points. This has the
+              advantage to have a constant computational complexity independent of the data
+              size.
+
+              A number from 32 on can already yield good results, while the default is set
+              to 1024, creating a fine grid. Lowering the number increases the performance
+              at the cost of accuracy.
+
+pdf.kde.init.binning_method: |1+
+    Method to be used for
+              binning the data. Options are 'linear', 'simple'.
+
+              The data can be binned in the usual way ('simple'), but this is less precise
+              for KDEs, where we are interested in the shape of the histogram and smoothing
+              it. Therefore, a better suited method, 'linear', is available.
+
+              In normal binnig, each event (or weight) falls into the bin within the bin edges,
+              while the neighbouring bins get zero counts from this event.
+              In linear binning, the event is split between two bins, proportional to its
+              closeness to each bin.
+
+              The 'linear' method provides superior performance, most notably in small (~32)
+              grids.
+
+pdf.kde.bandwidth.explain_global: |1+
+    A global bandwidth
+              is a single parameter that is shared amongst all kernels.
+              While this is a fast and robust method,
+              it is a rule of thumb approximation. Due to its global nature,
+              it cannot take into account the different varying
+              local densities. It uses notably the least amount of memory
+              of all methods.
+
+pdf.kde.bandwidth.explain_local: |1+
+    A local bandwidth
+              means that each kernel :math:`i` has a different bandwidth.
+              In other words, given some data points with size n,
+              we will need n bandwidth parameters.
+              This is often more accurate than a global bandwidth,
+              as it allows to have larger bandwiths in areas of smaller density,
+              where, due to the small local sample size, we have less certainty
+              over the true density while having a smaller bandwidth in denser
+              populated areas.
+
+              The accuracy comes at the cost of a longer pre-calculation to obtain
+              the local bandwidth (there are different methods available), an
+              increased runtime and - most importantly - a peak memory usage.
+
+              This can be especially costly for a large number (> few thousand) of
+              kernels and/or evaluating on large datasets (> 10'000).
+
+pdf.kde.bandwidth.explain_adaptive: |1+
+    Adaptive bandwidths are
+              a way to reduce the dependence on the bandwidth parameter
+              and are usually local bandwidths that take into account
+              the local densities.
+              Adaptive bandwidths are constructed by using an initial estimate
+              of the local density in order to calculate a sensible bandwidth
+              for each kernel. The initial estimator can be a kernel density
+              estimation using a global bandwidth with a rule of thumb.
+              The adaptive bandwidth h is obtained using this estimate, where
+              usually
+
+              .. math::
+
+                h_{i} \propto f( x_{i} ) ^ {-1/2}
+
+              Estimates can still differ in the overall scaling of this
+              bandwidth.
+
+minimizer.verbosity: |1+
+    Verbosity of the minimizer. Has to be between 0 and 10.
+               The verbosity has the meaning:
+
+                - a value of 0 means quiet and no output
+                - above 0 up to 5, information that is good to know but without
+                  flooding the user, corresponding to a "INFO" level.
+                - A value above 5 starts printing out considerably more and
+                  is used more for debugging purposes.
+                - Setting the verbosity to 10 will print out every
+                  evaluation of the loss function and gradient.
+
+                Some minimizers offer additional output which is also
+                distributed as above but may duplicate certain printed values.
+
+minimizer.tol: |1+
+    Termination value for the
+                    convergence/stopping criterion of the algorithm
+                    in order to determine if the minimum has
+                    been found. Defaults to 1e-3.
+minimizer.criterion: |1+
+    Criterion of the minimum. This is an
+                    estimated measure for the distance to the
+                    minimum and can include the relative
+                    or absolute changes of the parameters,
+                    function value, gradients and more.
+                    If the value of the criterion is smaller
+                    than ``loss.errordef * tol``, the algorithm
+                    stopps and it is assumed that the minimum
+                    has been found.
+minimizer.strategy: |1+
+    A class of type ``ZfitStrategy`` that takes no
+                    input arguments in the init. Determines the behavior of the minimizer in
+                    certain situations, most notably when encountering
+                    NaNs. It can also implement a callback function.
+minimizer.maxiter: |1+
+    Approximate number of iterations.
+                    This corresponds to roughly the maximum number of
+                    evaluations of the ``value``, 'gradient`` or ``hessian``.
+minimizer.name: |1+
+    Human-readable name of the minimizer.
+minimizer.maxcor: |1+
+    Maximum number of memory history to keep
+                    when using a quasi-Newton update formula such as BFGS.
+                    It is the number of gradients
+                    to “remember” from previous optimization
+                    steps: increasing it increases
+                    the memory requirements but may speed up the convergence.
+minimizer.init.maxls: |1+
+    Maximum number of linesearch points.
+
+minimizer.scipy.gradient: |1+
+    Define the method to use for the gradient computation
+                    that the minimizer should use. This can be the
+                    gradient provided by the loss itself or
+                    method from the minimizer.
+                    In general, using the zfit provided automatic gradient is
+                    more precise and needs less computation time for the
+                    evaluation compared to a numerical method, but it may not always be
+                    possible. In this case, zfit switches to a generic, numerical gradient
+                    which in general performs worse than if the minimizer has its own
+                    numerical gradient.
+                    The following are possible choices:
+
+                    If set to ``False`` or ``'zfit'`` (or ``None``; default), the
+                    gradient of the loss (usually the automatic gradient) will be used;
+                    the minimizer won't use an internal algorithm.
+
+
+minimizer.scipy.gradient.internal: |1+
+    ``True`` tells the minimizer to use its default internal
+                    gradient estimation. This can be specified more clearly using the
+                    arguments ``'2-point'`` and ``'3-point'``, which specify the
+                    numerical algorithm the minimizer should use in order to
+                    estimate the gradient.
+minimizer.scipy.hessian: |1+
+    Define the method to use for the hessian computation
+                    that the minimizer should use. This can be the
+                    hessian provided by the loss itself or
+                    method from the minimizer.
+
+                    While the exact gradient can speed up the convergence and is
+                    often beneficial, this ain't true for the computation of the
+                    (inverse) Hessian matrix.
+                    Due to the :math:`n^2` number of entries (compared to :math:`n` in the
+                    gradient) from the :math:`n` parameters, this can grow quite
+                    large and become computationally expensive.
+
+                    Therefore, many algorithms use an approximated (inverse)
+                    Hessian matrix making use of the gradient updates instead
+                    of calculating the exact matrix. This turns out to be
+                    precise enough and usually considerably speeds up the
+                    convergence.
+
+                    The following are possible choices:
+
+                    If set to ``False`` or ``'zfit'``, the
+                    hessian defined in the loss (usually using automatic differentiation)
+                    will be used;
+                    the minimizer won't use an internal algorithm.
+minimizer.scipy.hessian.internal: |1+
+    A :class:`~scipy.optimize.HessianUpdateStrategy` that holds
+                    an approximation of the hessian. For example
+                    :class:`~scipy.optimize.BFGS` (which performs usually best)
+                    or :class:`~scipy.optimize.SR1`
+                    (sometimes unstable updates).
+                    ``True``  (or ``None``; default) tells the minimizer
+                    to use its default internal
+                    hessian approximation.
+                    Arguments ``'2-point'`` and ``'3-point'`` specify which
+                    numerical algorithm the minimizer should use in order to
+                    estimate the hessian. This is only possible if the
+                    gradient is provided by zfit and not an internal numerical
+                    method is already used to determine it.
+
+minimizer.scipy.info: |1+
+    This implenemtation wraps the minimizers in
+         `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_.
+minimizer.trust.eta: |1+
+    Trust region related acceptance
+                    stringency for proposed steps.
+minimizer.trust.init_trust_radius: |1+
+    Initial trust-region radius.
+minimizer.trust.max_trust_radius: |1+
+    Maximum value of the trust-region radius.
+                    No steps that are longer than this value will be proposed.
+
+minimizer.nlopt.population: |1+
+    The population size for the evolutionary algorithm.
+
+minimizer.nlopt.info: |1+
+    More information on the algorithm can be found
+         `here <https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/>`_.
+
+         This implenemtation uses internally the
+         `NLopt library <https://nlopt.readthedocs.io/en/latest/>`_.
+         It is a
+         free/open-source library for nonlinear optimization,
+         providing a common interface for a number of
+         different free optimization routines available online as well as
+         original implementations of various other algorithms.
+
+loss.binned.init.model: |1+
+   Binned PDF(s) that return the normalized probability
+                (``rel_counts`` or ``counts``) for
+                *data* under the given parameters.
+                If multiple model and data are given, they will be used
+                in the same order to do a simultaneous fit.
+
+loss.binned.init.data: |1+
+    Binned dataset that will be given to the *model*.
+                If multiple model and data are given, they will be used
+                in the same order to do a simultaneous fit.
+loss.init.model: |1+
+    PDFs that return the normalized probability for
+                *data* under the given parameters.
+                If multiple model and data are given, they will be used
+                in the same order to do a simultaneous fit.
+
+loss.init.data: |1+
+    Dataset that will be given to the *model*.
+                If multiple model and data are given, they will be used
+                in the same order to do a simultaneous fit.
+                If the data is not a ``ZfitData`` object, i.e. it doesn't have ha space
+                it has to be withing the limits of the model, otherwise, an
+                :py:class:`~zfit.exception.IntentionAmbiguousError` will be raised.
+
+loss.init.constraints: |1+
+    Auxiliary measurements ("constraints")
+                that add a likelihood term to the loss.
+
+                .. math::
+                  \mathcal{L}(\theta) = \mathcal{L}_{unconstrained} \prod_{i} f_{constr_i}(\theta)
+
+                Usually, an auxiliary measurement -- by its very nature -S  should only be added once
+                to the loss. zfit does not automatically deduplicate constraints if they are given
+                multiple times, leaving the freedom for arbitrary constructs.
+
+                Constraints can also be used to restrict the loss by adding any kinds of penalties.
+
+loss.init.explain.unbinnednll: |1+
+    The unbinned log likelihood can be written as
+
+         .. math::
+             \mathcal{L}_{non-extended}(x | \theta) = \prod_{i} f_{\theta} (x_i)
+
+         where :math:`x_i` is a single event from the dataset *data* and f is the *model*.
+loss.init.explain.extendedterm: |1+
+    The extended likelihood has an additional term
+
+         .. math::
+             \mathcal{L}_{extended term} = poiss(N_{tot}, N_{data})
+             = N_{data}^{N_{tot}} \frac{e^{- N_{data}}}{N_{tot}!}
+
+         and the extended likelihood is the product of both.
+loss.init.explain.simultaneous: |1+
+    A simultaneous fit can be performed by giving one or more ``model``, ``data``, to the loss. The
+         length of each has to match the length of the others
+
+         .. math::
+             \mathcal{L}_{simultaneous}(\theta | {data_0, data_1, ..., data_n})
+             = \prod_{i} \mathcal{L}(\theta_i, data_i)
+
+         where :math:`\theta_i` is a set of parameters and
+         a subset of :math:`\theta`
+
+
+loss.init.explain.negativelog: |1+
+    For optimization purposes, it is often easier
+         to minimize a function and to use a log transformation. The actual loss is given by
+
+         .. math::
+              \mathcal{L} = - \sum_{i}^{n} ln(f(\theta|x_i))
+
+         and therefore being called "negative log ..."
+
+loss.init.explain.spdtransform: |1+
+    A scaled Poisson distribution is
+         used as described by Bohm and Zech, NIMA 748 (2014) 1-6 if the variance
+         of the data is not ``None``. The scaling is forced to be >= 1 in order
+         to avoid issues with empty bins.
+
+loss.init.explain.weightednll: |1+
+    If the dataset has weights, a weighted likelihood will be constructed instead
+
+         .. math::
+             \mathcal{L} = - \sum_{i}^{n} w_i \cdot ln(f(\theta|x_i))
+
+         Note that this is not a real likelihood anymore! Calculating uncertainties
+         can be done with hesse (as it has a correction) but will yield wrong
+         results with profiling methods. The minimum is however fully valid.
+
+loss.init.binned.explain.chi2zeros: |1+
+    If the dataset has empty bins, the errors
+         will be zero and :math:`\chi^2` is undefined. Two possibilities are available and
+         can be given as an option:
+
+         - "empty": "ignore" will ignore all bins with zero entries and won't count to the loss
+         - "errors": "expected" will use the expected counts from the model
+           with a Poissonian uncertainty
+
+loss.init.options: |1+
+    Additional options (as a dict) for the loss.
+                Current possibilities include:
+
+                - 'subtr_const' (default True): subtract from each points
+                  log probability density a constant that
+                  is approximately equal to the average log probability
+                  density in the very first evaluation before
+                  the summation. This brings the initial loss value closer to 0 and increases,
+                  especially for large datasets, the numerical stability.
+
+                  The value will be stored ith 'subtr_const_value' and can also be given
+                  directly.
+
+                  The subtraction should not affect the minimum as the absolute
+                  value of the NLL is meaningless. However,
+                  with this switch on, one cannot directly compare
+                  different likelihoods absolute value as the constant
+                  may differ! Use ``create_new`` in order to have a comparable likelihood
+                  between different losses or use the ``full`` argument in the value function
+                  to calculate the full loss with all constants.
+
+
+                These settings may extend over time. In order to make sure that a loss is the
+                same under the same data, make sure to use ``create_new`` instead of instantiating
+                a new loss as the former will automatically overtake any relevant constants
+                and behavior.
+
+loss.args.params: |1+
+    Mapping of the parameter names to the actual
+                values. The parameter names refer to the names of the parameters,
+                typically :py:class:`~zfit.Parameter`, that is returned by
+                `get_params()`. If no params are given, the current default
+                values of the parameters are used.
+
+loss.value.full: |1+
+    If True, return the full loss value, otherwise
+                allow for the removal of constants and only return
+                the part that depends on the parameters. Constants
+                don't matter for the task of optimization, but
+                they can greatly help with the numerical stability of the loss function.
+
+loss.args.numgrad: |1+
+    If ``True``, calculate the numerical gradient/Hessian
+                instead of using the automatic one. This is
+                usually slower if called repeatedly but can
+                be used if the automatic gradient fails (e.g. if
+                the model is not differentiable, written not in znp.* etc).
+                Default will fall back to what the loss is set to.
+
+result.init.loss: |1+
+    The loss function that was minimized.
+                Usually, but not necessary, contains
+                also the pdf, data and constraints.
+result.init.params: |1+
+    Result of the fit where each
+                :py:class:`~zfit.Parameter` key has the
+                value from the minimum found by the minimizer.
+
+result.init.minimizer: |1+
+    Minimizer that was used to obtain this ``FitResult`` and will be used to
+                    calculate certain errors. If the minimizer
+                    is state-based (like "iminuit"), then this is a copy
+                    and the state of other ``FitResults`` or of the *actual*
+                    minimizer that performed the minimization
+                    won't be altered.
+result.init.valid: |1+
+    Indicating whether the result is valid or not. This is the strongest
+                    indication and serves as
+                    the global flag. The reasons why a result may be
+                    invalid can be arbitrary, including but not exclusive:
+
+                    - parameter(s) at the limit
+                    - maxiter reached without proper convergence
+                    - the minimizer maybe even converged but it is known
+                      that this is only a local minimum
+
+                    To indicate the reason for the invalidity, pass a message.
+result.init.edm: |1+
+    The estimated distance to minimum
+                    which is the criterion value at the minimum.
+result.init.fmin: |1+
+    Value of the function at the minimum.
+result.init.criterion: |1+
+    Criterion that was used during the minimization.
+                    This determines the estimated distance to the
+                    minimum (edm)
+result.init.status: |1+
+    A status code (if available) that describes
+                    the minimization termination. 0 means a valid
+                    termination.
+result.init.converged: |1+
+    Whether the fit has successfully converged or not.
+                    The result itself can still be an invalid minimum
+                    such as if the parameters are at or close
+                    to the limits or in case another minimum is found.
+result.init.message: |1+
+    Human-readable message to indicate the reason
+                    if the fitresult is not valid.
+                    If the fit is valid, the message (should)
+                    be an empty string (or None),
+                    otherwise, it should denote the reason for the invalidity.
+result.init.info: |1+
+    Additional information (if available)
+                    such as *number of gradient function calls* or the
+                    original minimizer return message.
+                    This is a relatively free field and _no single field_
+                    in it is guaranteed to be stable.
+                    Some recommended fields:
+
+                    - *original*: contains the original returned object
+                      by the minimizer used internally.
+                    - *optimizer*: the actual instance of the wrapped
+                      optimizer (if available)
+result.init.approx: |1+
+    Collection of approximations found during
+                    the minimization process such as gradient and hessian.
+result.init.niter: |1+
+    Approximate number of iterations ~= number
+                    of function evaluations ~= number of gradient evaluations.
+                    This is an approximated value and the exact meaning
+                    can differ between different minimizers.
+result.init.evaluator: |1+
+    Loss evaluator that was used during the
+                    minimization and that may contain information
+                    about the last evaluations of the gradient
+                    etc. which can serve as approximations.
+
+result.init.values: |1+
+    Values of the parameters at the
+                    found minimum.
+
+hs3.ini.reuse_params: |1+
+    If parameters, the parameters
+                    will be reused if they are given.
+                    If a parameter is given, it will be used as the parameter
+                    with the same name. If a parameter is not given, a new
+                    parameter will be created.
+
+hs3.explain: |1+
+    The `HEP Statistics Serialization Standard <https://github.com/hep-statistics-serialization-standard/hep-statistics-serialization-standard>`_,
+                    or in short, :math:`\text{HS}^3`, is a serialization format for statistical models.
+                    It is a JSON/YAML-based serialization that is a
+                    coordinated effort of the HEP community to standardize the serialization of statistical models. The standard
+                    is still in development and is not yet finalized. This function is experimental and may change in the future.
+
+hs3.layout.explain: |1+
+    The keys in the HS3 format are:
+                     - 'distributions': list of PDFs
+                     - 'variables': list of variables, i.e. ``zfit.Space`` and ``zfit.Parameter`` (or more generally parameters)
+                     - 'loss': list of losses
+                     - 'data': list of data
+                     - 'metadata': contains the version of the HS3 format and the
+                       zfit version used to create the file
diff --git a/utils/api/replace_argdocs.py b/utils/api/replace_argdocs.py
new file mode 100755
index 0000000..2f9db48
--- /dev/null
+++ b/utils/api/replace_argdocs.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+#  Copyright (c) 2024 zfit
+from __future__ import annotations
+
+import argparse
+import os
+import re
+from pathlib import Path
+
+import yaml
+
+here = Path(os.path.realpath(__file__)).parent
+
+parser = argparse.ArgumentParser(
+    description="Replace arguments with central stored ones",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+)
+
+parser.add_argument("files", nargs="*", help="Files to be processed.")
+
+parser.add_argument("--dry", action="store_true", help="Dry run WITHOUT replacing.")
+
+cfg = parser.parse_args()
+
+with Path(here / "argdocs.yaml").open() as replfile:
+    replacements = yaml.load(replfile, Loader=yaml.Loader)
+
+# Replace the target string
+# auto_end_old = r'|@docend|'
+for filepath in cfg.files:
+    if not filepath.endswith(".py"):
+        continue
+    with Path(filepath).open() as file:
+        filedata = file.read()
+
+    infile = False
+    needs_replacement = False
+    for param, replacement in replacements.items():
+        replacement = replacement.rstrip("\n")
+        while replacement[:1] == " ":  # we want to remove the whitespace
+            replacement = replacement[1:]
+        auto_start = rf"|@doc:{param}|"
+        auto_end = rf"|@docend:{param}|"
+        matches = re.findall(
+            auto_start.replace("|", r"\|") + r".*?" + auto_end.replace("|", r"\|"),
+            filedata,
+            re.DOTALL,
+        )
+
+        if not matches:
+            continue
+        infile = True
+
+        replacement_mod = f"{auto_start} {replacement} {auto_end}"
+
+        for match in matches:
+            if auto_start in match[len(auto_start) :]:  # sanity check
+                msg = f"Docstring formatting error," f" has more than one start until an end command: {match}"
+                raise ValueError(msg)
+            if match != replacement_mod:
+                needs_replacement = True
+                filedata = filedata.replace(match, replacement_mod)
+
+    # Write the file out again
+    replace_msg = "replaced docs" if needs_replacement else "docs already there"
+    filename = filepath.split("/")[-1]
+    if infile:
+        if cfg.dry:
+            pass
+        elif needs_replacement:
+            with Path(filepath).open("w") as file:
+                file.write(filedata)