add a basic test for observation I/O, and some more docs for dataform…

…ats.
bd-j · Apr 28, 2024 · ad63e99 · ad63e99
1 parent 5829177
commit ad63e99
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 15 deletions.
diff --git a/doc/dataformat.rst b/doc/dataformat.rst
@@ -13,8 +13,9 @@ uncertainties thereon, they tell prospector what data to predict, contain
 dataset-specific information for how to predict that data, and can even store
 methods for computing likelihoods in the case of complicated, dataset-specific
 noise models. There are two fundamental kinds of data, `Photometry` and
-`Spectrum` that are each subclasses of `Observation`.  They have the following
-attributes, most of which can be also accessed as dictionary keys.
+`Spectrum` that are each subclasses of `Observation`. There is also also a
+`Lines` class for integrated emission line fluxes. They have the following
+attributes, most of which can also be accessed as dictionary keys.
 
 
 - ``wavelength``
@@ -30,7 +31,9 @@ attributes, most of which can be also accessed as dictionary keys.
     apparent magnitude. That is, 1 maggie is the flux density in Janskys divided
     by 3631. If absolute spectrophotometry is available, the units for a
     `Spectrum`` should also be maggies, otherwise photometry must be present and
-    a calibration vector must be supplied or fit.
+    a calibration vector must be supplied or fit.  Note that for convenience
+    there is a `maggies_to_nJy` attribute of `Observation` that gives the
+    conversion factor.
 
 - ``uncertainty``
     The uncertainty vector (sigma), in same units as ``flux``, ndarray of same
@@ -44,6 +47,9 @@ attributes, most of which can be also accessed as dictionary keys.
    For a `Photometry`, this is a list of strings corresponding to filter names
    in `sedpy <https://github.com/bd-j/sedpy>`_
 
+- ``line_ind``
+  For a `Lines` instance, the (zero-based) index of the emission line in the
+  FSPS emission line table.
 
 In addition to these attributes, several additional aspects of an observation
 are used to help predict data or to compute likelihoods.  The latter is
@@ -73,8 +79,7 @@ For a single observation, you might do something like:
 
         def build_obs(N):
             from prospect.data import Spectrum
-            # dummy observation dictionary with just a spectrum
-            N = 1000
+            N = 1000  # number of wavelength points
             spec = Spectrum(wavelength=np.linspace(3000, 5000, N), flux=np.zeros(N), uncertainty=np.ones(N))
             # ensure that this is a valid observation for fitting
             spec = spec.rectify()
@@ -84,6 +89,20 @@ For a single observation, you might do something like:
 
 Note that `build_obs` returns a *list* even if there is only one dataset.
 
+For photometry this might look like:
+
+.. code-block:: python
+
+        def build_obs(N):
+            from prospect.data import Photometry
+            # valid sedpy filter names
+            fnames = list([f"sdss_{b}0" for b in "ugriz"])
+            Nf = len(fnames)
+            phot = [Photometry(filters=fnames, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)]
+            # ensure that this is a valid observation for fitting
+            phot = phot.rectify()
+            observations = [phot]
+            return observations
 
 Converting from old style obs dictionaries
 ------------------------------------------

diff --git a/prospect/io/write_results.py b/prospect/io/write_results.py
@@ -106,7 +106,7 @@ def write_hdf5(hfile, run_params, model, obs,
         generate and store
     """
     # If ``hfile`` is not a file object, assume it is a filename and open
-    if type(hfile) is str:
+    if isinstance(hfile, str):
         hf = h5py.File(hfile, "w")
     else:
         hf = hfile
@@ -122,6 +122,11 @@ def write_hdf5(hfile, run_params, model, obs,
     write_sampling_h5(hf, chain, extras)
     hf.flush()
 
+    # ----------------------
+    # Observational data
+    write_obs_to_h5(hf, obs)
+    hf.flush()
+
     # ----------------------
     # High level parameter and version info
     meta = metadata(run_params, model, write_model_params=write_model_params)
@@ -137,11 +142,6 @@ def write_hdf5(hfile, run_params, model, obs,
         mgroup = hf.create_group('optimization')
         mdat = mgroup.create_dataset('optimizer_results', data=out)
 
-    # ----------------------
-    # Observational data
-    write_obs_to_h5(hf, obs)
-    hf.flush()
-
     # ---------------
     # Best fitting model in space of data
     if sps is not None:

diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py
@@ -10,7 +10,7 @@
 from ..likelihood.noise_model import NoiseModel
 
 
-__all__ = ["Observation", "Spectrum", "Photometry", "Lines"
+__all__ = ["Observation", "Spectrum", "Photometry", "Lines",
            "from_oldstyle", "from_serial", "obstypes"]
 
 
@@ -125,6 +125,11 @@ def _automask(self):
     def render(self, wavelength, spectrum):
         raise(NotImplementedError)
 
+    @property
+    def kind(self):
+        # make 'kind' private
+        return self._kind
+
     @property
     def ndof(self):
         # TODO: cache this?
@@ -210,7 +215,7 @@ def maggies_to_nJy(self):
 
 class Photometry(Observation):
 
-    kind = "photometry"
+    _kind = "photometry"
     alias = dict(maggies="flux",
                  maggies_unc="uncertainty",
                  filters="filters",
@@ -273,7 +278,7 @@ def to_oldstyle(self):
 
 class Spectrum(Observation):
 
-    kind = "spectrum"
+    _kind = "spectrum"
     alias = dict(spectrum="flux",
                  unc="uncertainty",
                  wavelength="wavelength",
@@ -442,7 +447,7 @@ def _smooth_lsf_fft(self, inwave, influx, outwave, sigma):
 
 class Lines(Spectrum):
 
-    kind = "lines"
+    _kind = "lines"
     alias = dict(spectrum="flux",
                  unc="uncertainty",
                  wavelength="wavelength",

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+import h5py
+
+from prospect.models import SpecModel, templates
+from prospect.sources import CSPSpecBasis
+from prospect.observation import Photometry, Spectrum
+from prospect.io.write_results import write_obs_to_h5
+from prospect.io.read_results import obs_from_h5
+
+
+@pytest.fixture(scope="module")
+def build_sps():
+    sps = CSPSpecBasis(zcontinuous=1)
+    return sps
+
+
+def build_model(add_neb=False):
+    model_params = templates.TemplateLibrary["parametric_sfh"]
+    if add_neb:
+        model_params.update(templates.TemplateLibrary["nebular"])
+    return SpecModel(model_params)
+
+
+def build_obs(multispec=True):
+    N = 1500 * (2 - multispec)
+    wmax = 7000
+    wsplit = wmax - N * multispec
+
+    fnames = list([f"sdss_{b}0" for b in "ugriz"])
+    Nf = len(fnames)
+    phot = [Photometry(filters=fnames, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)]
+    spec = [Spectrum(wavelength=np.linspace(4000, wsplit, N),
+                     flux=np.ones(N), uncertainty=np.ones(N) / 10,
+                     mask=slice(None))]
+
+    if multispec:
+        spec += [Spectrum(wavelength=np.linspace(wsplit+1, wmax, N),
+                          flux=np.ones(N), uncertainty=np.ones(N) / 10,
+                          mask=slice(None))]
+
+    obslist = spec + phot
+    [obs.rectify() for obs in obslist]
+    return obslist
+
+
+def test_observation_io(build_sps, plot=False):
+    sps = build_sps
+
+    obslist = build_obs(multispec=True)
+    model = build_model(add_neb=True)
+
+    # obs writing
+    with h5py.File("test.h5", "w") as hf:
+        write_obs_to_h5(hf, obslist)
+    with h5py.File("test.h5", "r") as hf:
+        obsr = obs_from_h5(hf["observations"])