Skip to content

Commit

Permalink
Merge pull request #1366 from jaladh-singhal/hdf-scalars-fix
Browse files Browse the repository at this point in the history
Fix duplicate appending of scalars in HDF writer and add an overwrite option
  • Loading branch information
andrewfullard authored Dec 4, 2020
2 parents 5ae8d7c + 077cfe7 commit 1249c4e
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 69 deletions.
14 changes: 7 additions & 7 deletions tardis/io/tests/test_HDFWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, property):
def test_simple_write(tmpdir, attr):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
actual = MockHDF(attr)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars")["property"]
assert actual.property == expected

Expand All @@ -59,7 +59,7 @@ def test_simple_write(tmpdir, attr):
def test_complex_obj_write(tmpdir, attr):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
actual = MockHDF(attr)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/property").values
assert_array_almost_equal(actual.property, expected)

Expand All @@ -76,7 +76,7 @@ def test_complex_obj_write(tmpdir, attr):
def test_MultiIndex_write(tmpdir):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
actual = MockHDF(mock_multiIndex)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/property")
expected = pd.MultiIndex.from_tuples(expected.unstack().values)
pdt.assert_almost_equal(actual.property, expected)
Expand All @@ -92,7 +92,7 @@ def test_quantity_objects_write(tmpdir, attr):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
attr_quantity = u.Quantity(attr, "g/cm**3")
actual = MockHDF(attr_quantity)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/property")
assert_array_almost_equal(actual.property.cgs.value, expected)

Expand All @@ -105,15 +105,15 @@ def test_scalar_quantity_objects_write(tmpdir, attr):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
attr_quantity = u.Quantity(attr, "g/cm**3")
actual = MockHDF(attr_quantity)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars/")["property"]
assert_array_almost_equal(actual.property.cgs.value, expected)


def test_none_write(tmpdir):
fname = str(tmpdir.mkdir("data").join("test.hdf"))
actual = MockHDF(None)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected = pd.read_hdf(fname, key="/test/mock_hdf/scalars/")["property"]
if expected == "none":
expected = None
Expand All @@ -138,7 +138,7 @@ def test_objects_write(tmpdir, attr):
nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5]))
attr_quantity = u.Quantity(attr, "g/cm**3")
actual = MockClass(attr_quantity, nested_object)
actual.to_hdf(fname, path="test")
actual.to_hdf(fname, path="test", overwrite=True)
expected_property = pd.read_hdf(fname, key="/test/mock_class/property")
assert_array_almost_equal(actual.property.cgs.value, expected_property)
nested_property = pd.read_hdf(
Expand Down
2 changes: 1 addition & 1 deletion tardis/io/tests/test_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_config_hdf(hdf_file_path, tardis_config_verysimple):
expected = Configuration.from_config_dict(
tardis_config_verysimple, validate=True, config_dirname="test"
)
expected.to_hdf(hdf_file_path)
expected.to_hdf(hdf_file_path, overwrite=True)
actual = pd.read_hdf(hdf_file_path, key="/simulation/config")
expected = expected.get_properties()["config"]
assert actual[0] == expected[0]
115 changes: 60 additions & 55 deletions tardis/io/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def get_internal_data_path(fname):
def quantity_from_str(text):
"""
Convert a string to `astropy.units.Quantity`
Parameters
----------
text :
The string to convert to `astropy.units.Quantity`
Returns
-------
`astropy.units.Quantity`
Expand Down Expand Up @@ -76,7 +76,7 @@ def match(self, text):
----------
text :
A string to be passed to `target_type` for conversion.
Returns
-------
`True` if `text` can be converted to `target_type`.
Expand Down Expand Up @@ -196,10 +196,10 @@ def __new__(cls, *args, **kwargs):
return instance

@staticmethod
def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"):
"""
A function to uniformly store TARDIS data
to an HDF file.
def to_hdf_util(
path_or_buf, path, elements, overwrite, complevel=9, complib="blosc"
):
"""A function to uniformly store TARDIS data to an HDF file.
Scalars will be stored in a Series under path/scalars
1D arrays will be stored under path/property_name as distinct Series
Expand All @@ -209,32 +209,40 @@ def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"):
Parameters
----------
path_or_buf :
Path or buffer to the HDF store
path_or_buf : str or pandas.io.pytables.HDFStore
Path or buffer to the HDF file
path : str
Path inside the HDF store to store the `elements`
Path inside the HDF file to store the `elements`
elements : dict
A dict of property names and their values to be
stored.
Returns
-------
overwrite: bool
If the HDF file path already exists, whether to overwrite it or not
Notes
-----
`overwrite` option doesn't have any effect when `path_or_buf` is an
HDFStore because the user decides on the mode in which they have
opened the HDFStore ('r', 'w' or 'a').
"""
we_opened = False

try:
try: # when path_or_buf is a str, the HDFStore should get created
buf = pd.HDFStore(path_or_buf, complevel=complevel, complib=complib)
except TypeError as e: # Already a HDFStore
except TypeError as e:
if e.message == "Expected bytes, got HDFStore":
# when path_or_buf is an HDFStore buffer instead
buf = path_or_buf
else:
raise e
else: # path_or_buf was a string and we opened the HDFStore
we_opened = True
else: # path_or_buf was a str
if os.path.exists(path_or_buf) and not overwrite:
buf.close()
raise FileExistsError(
"The specified HDF file already exists. If you still want "
"to overwrite it, set option overwrite=True"
)

if not buf.is_open:
buf.open()
we_opened = True

scalars = {}
for key, value in elements.items():
Expand All @@ -253,25 +261,17 @@ def to_hdf_util(path_or_buf, path, elements, complevel=9, complib="blosc"):
pd.DataFrame(value).to_hdf(buf, os.path.join(path, key))
else:
pd.DataFrame(value).to_hdf(buf, os.path.join(path, key))
else:
else: # value is a TARDIS object like model, runner or plasma
try:
value.to_hdf(buf, path, name=key)
value.to_hdf(buf, path, name=key, overwrite=overwrite)
except AttributeError:
data = pd.DataFrame([value])
data.to_hdf(buf, os.path.join(path, key))

if scalars:
scalars_series = pd.Series(scalars)
pd.Series(scalars).to_hdf(buf, os.path.join(path, "scalars"))

# Unfortunately, with to_hdf we cannot append, so merge beforehand
scalars_path = os.path.join(path, "scalars")
try:
scalars_series = buf[scalars_path].append(scalars_series)
except KeyError: # no scalars in HDFStore
pass
scalars_series.to_hdf(buf, os.path.join(path, "scalars"))

if we_opened:
if buf.is_open:
buf.close()

def get_properties(self):
Expand All @@ -291,19 +291,18 @@ def convert_to_snake_case(s):
s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", s)
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()

def to_hdf(self, file_path, path="", name=None):
def to_hdf(self, file_path_or_buf, path="", name=None, overwrite=False):
"""
Parameters
----------
file_path : str
Path or buffer to the HDF store
file_path_or_buf : str or pandas.io.pytables.HDFStore
Path or buffer to the HDF file
path : str
Path inside the HDF store to store the `elements`
Path inside the HDF file to store the `elements`
name : str
Group inside the HDF store to which the `elements` need to be saved
Returns
-------
Group inside the HDF file to which the `elements` need to be saved
overwrite: bool
If the HDF file path already exists, whether to overwrite it or not
"""
if name is None:
try:
Expand All @@ -313,7 +312,7 @@ def to_hdf(self, file_path, path="", name=None):

data = self.get_properties()
buff_path = os.path.join(path, name)
self.to_hdf_util(file_path, buff_path, data)
self.to_hdf_util(file_path_or_buf, buff_path, data, overwrite)


class PlasmaWriterMixin(HDFWriterMixin):
Expand All @@ -338,30 +337,36 @@ def get_properties(self):
data.pop("nlte_data")
return data

def to_hdf(self, file_path, path="", name=None, collection=None):
def to_hdf(
self,
file_path_or_buf,
path="",
name=None,
collection=None,
overwrite=False,
):
"""
Parameters
----------
file_path : str
Path or buffer to the HDF store
file_path_or_buf : str or pandas.io.pytables.HDFStore
Path or buffer to the HDF file
path : str
Path inside the HDF store to store the `elements`
Path inside the HDF file to store the `elements`
name : str
Group inside the HDF store to which the `elements` need to be saved
Group inside the HDF file to which the `elements` need to be saved
collection :
`None` or a `PlasmaPropertyCollection` of which members are
the property types which will be stored. If `None` then
all types of properties will be stored.
This acts like a filter, for example if a value of
`property_collections.basic_inputs` is given, only
those input parameters will be stored to the HDF store.
Returns
-------
all types of properties will be stored. This acts like a filter,
for example if a value of `property_collections.basic_inputs` is
given, only those input parameters will be stored to the HDF file.
overwrite: bool
If the HDF file path already exists, whether to overwrite it or not
"""
self.collection = collection
super(PlasmaWriterMixin, self).to_hdf(file_path, path, name)
super(PlasmaWriterMixin, self).to_hdf(
file_path_or_buf, path, name, overwrite
)


def download_from_url(url, dst):
Expand Down
2 changes: 1 addition & 1 deletion tardis/model/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def test_model_decay(simple_isotope_abundance):

@pytest.fixture(scope="module", autouse=True)
def to_hdf_buffer(hdf_file_path, simulation_verysimple):
simulation_verysimple.model.to_hdf(hdf_file_path)
simulation_verysimple.model.to_hdf(hdf_file_path, overwrite=True)

model_scalar_attrs = ['t_inner']

Expand Down
2 changes: 1 addition & 1 deletion tardis/model/tests/test_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@pytest.fixture(scope="module", autouse=True)
def to_hdf_buffer(hdf_file_path,simulation_verysimple):
simulation_verysimple.model.homologous_density.to_hdf(hdf_file_path)
simulation_verysimple.model.homologous_density.to_hdf(hdf_file_path, overwrite=True)

def test_hdf_density_0(hdf_file_path, simulation_verysimple):
actual = simulation_verysimple.model.homologous_density.density_0
Expand Down
4 changes: 3 additions & 1 deletion tardis/montecarlo/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

@pytest.fixture(scope="module", autouse=True)
def to_hdf_buffer(hdf_file_path, simulation_verysimple):
simulation_verysimple.runner.to_hdf(hdf_file_path, name="runner")
simulation_verysimple.runner.to_hdf(
hdf_file_path, name="runner", overwrite=True
)


runner_properties = [
Expand Down
2 changes: 1 addition & 1 deletion tardis/montecarlo/tests/test_spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def compare_spectra(actual, desired):

@pytest.fixture(autouse=True)
def to_hdf_buffer(hdf_file_path, spectrum):
spectrum.to_hdf(hdf_file_path, name="spectrum")
spectrum.to_hdf(hdf_file_path, name="spectrum", overwrite=True)


@pytest.mark.parametrize("attr", TARDISSpectrum.hdf_properties)
Expand Down
3 changes: 2 additions & 1 deletion tardis/plasma/tests/test_hdf_plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@pytest.fixture(scope="module", autouse=True)
def to_hdf_buffer(hdf_file_path, simulation_verysimple):
simulation_verysimple.plasma.to_hdf(hdf_file_path)
simulation_verysimple.plasma.to_hdf(hdf_file_path, overwrite=True)


plasma_properties_list = [
Expand Down Expand Up @@ -104,6 +104,7 @@ def to_hdf_collection_buffer(hdf_file_path, simulation_verysimple):
hdf_file_path,
name="collection",
collection=property_collections.basic_inputs,
overwrite=True,
)


Expand Down
2 changes: 1 addition & 1 deletion tardis/widgets/tests/test_shell_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def simulation_shell_info(simulation_verysimple):

@pytest.fixture(scope="class")
def hdf_shell_info(hdf_file_path, simulation_verysimple):
simulation_verysimple.to_hdf(hdf_file_path) # save sim at hdf_file_path
simulation_verysimple.to_hdf(hdf_file_path, overwrite=True) # save sim at hdf_file_path
return HDFShellInfo(hdf_file_path)


Expand Down

0 comments on commit 1249c4e

Please sign in to comment.