From 297326eb1053c4ccc74bbf9e94e3cff04dba3348 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Wed, 16 Apr 2025 10:07:47 +0200
Subject: [PATCH 1/8] refactor Exposures.write_hdf5 and .from_hdf5: use wkb
 instead of pickle for geometry serialization

---
 climada/entity/exposures/base.py | 44 +++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index f437d2d46..22889d5aa 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -37,6 +37,7 @@
 from geopandas import GeoDataFrame, GeoSeries, points_from_xy
 from mpl_toolkits.axes_grid1 import make_axes_locatable
 from rasterio.warp import Resampling
+from xarray import DataArray
 
 import climada.util.coordinates as u_coord
 import climada.util.hdf5_handler as u_hdf5
@@ -1121,20 +1122,30 @@ def plot_basemap(
         self.to_crs(crs_ori, inplace=True)
         return axis
 
-    def write_hdf5(self, file_name):
+    def write_hdf5(self, file_name, pickle_geometry=False):
         """Write data frame and metadata in hdf5 format
 
         Parameters
         ----------
         file_name : str
             (path and) file name to write to.
+        pickle_geometry : bool
+            flag, indicating whether the "geometry" of the Exposures` `data` will be stored as
+            pickled shapely objects instead of wkb bytes. This is faster but less durable, because
+            pickled data may get unreadable for future shapely versions.
+            Default: False
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
-        pandas_df = pd.DataFrame(self.gdf)
+        pandas_df = pd.DataFrame(self.data)
+        wkb_data = {}
         for col in pandas_df.columns:
             if str(pandas_df[col].dtype) == "geometry":
-                pandas_df[col] = np.asarray(self.gdf[col])
+                if pickle_geometry:
+                    pandas_df[col] = np.asarray(self.data[col])
+                else:
+                    wkb_data[col] = to_wkb_store(self.geometry)
+                    pandas_df.drop(columns=["geometry"])
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
@@ -1142,6 +1153,9 @@ def write_hdf5(self, file_name):
             # Write dataframe
             store.put("exposures", pandas_df)
 
+        if wkb_data:
+            store.put("wkb_data", wkb_data)
+
         var_meta = {}
         for var in type(self)._metadata:
             var_meta[var] = getattr(self, var)
@@ -1184,7 +1198,14 @@ def from_hdf5(cls, file_name):
             crs = metadata.get("crs", metadata.get("_crs"))
             if crs is None and metadata.get("meta"):
                 crs = metadata["meta"].get("crs")
-            exp = cls(store["exposures"], crs=crs)
+            data = pd.DataFrame(store["exposures"])
+            try:
+                wkb_data = store.get("wkb_data")
+            except KeyError:
+                wkb_data = {}
+            for col, val in wkb_data.items():
+                data[col] = from_wkb_store(val)
+            exp = cls(data, crs=crs)
             for key, val in metadata.items():
                 if key in type(exp)._metadata:  # pylint: disable=protected-access
                     setattr(exp, key, val)
@@ -1553,6 +1574,21 @@ def _read_mat_optional(exposures, data, var_names):
         pass
 
 
+def to_wkb_store(geometry: np.array, store):
+    wkb_data = geometry.to_wkb().to_numpy()
+    import h5py
+
+    wkb_dataset = h5py.Dataset(store)
+
+    # Store WKB as variable-length byte arrays
+    dt = h5py.vlen_dtype(np.dtype("uint8"))
+    wkb_dataset.dtype = dt
+    for i, geom_bytes in enumerate(wkb_data):
+        wkb_dataset[i] = np.frombuffer(geom_bytes, dtype="uint8")
+
+    return wkb_data
+
+
 def _read_mat_metadata(exposures, data, file_name, var_names):
     """Fill metadata in DataFrame object"""
     try:

From 5e8e583c4ea69b9c121dfe8afe3cf5dc10d7f736 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Tue, 22 Apr 2025 16:53:25 +0200
Subject: [PATCH 2/8] refactor Exposures.write_hdf5

---
 climada/entity/exposures/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 22889d5aa..69512c49d 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1145,7 +1145,7 @@ def write_hdf5(self, file_name, pickle_geometry=False):
                     pandas_df[col] = np.asarray(self.data[col])
                 else:
                     wkb_data[col] = to_wkb_store(self.geometry)
-                    pandas_df.drop(columns=["geometry"])
+                    pandas_df.drop(columns=[col], inplace=True)
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():

From ab18675045105ace837cd850e69ef4a72bcc0a51 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Wed, 30 Apr 2025 16:05:35 +0200
Subject: [PATCH 3/8] change of plan: just pickle geometries in wkb format

---
 CHANGELOG.md                               |  5 ++
 climada/entity/exposures/base.py           | 55 +++++--------
 climada/entity/exposures/test/test_base.py | 89 +++++++++++-----------
 3 files changed, 71 insertions(+), 78 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42757783c..74780084e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,18 +15,23 @@ Removed:
 - `pandas-datareader`
 
 ### Added
+
 - Added instructions to install Climada petals on Euler cluster in `doc.guide.Guide_Euler.ipynb` [#1029](https://github.com/CLIMADA-project/climada_python/pull/1029)
 
 ### Changed
+
 - `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 - World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
+- `Exposures.write_hdf5` pickles geometry data in WKB format by default, and not as `shapely` objects anymore. There is now a flag to keep the previous behavior.
 
 ### Fixed
+
 - NaN plotting issues in `geo_im_from_array`[#1038](https://github.com/CLIMADA-project/climada_python/pull/1038)
 
 ### Deprecated
 
 ### Removed
+
 - `climada.util.interpolation.round_to_sig_digits` [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 
 ## 6.0.1
diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 69512c49d..fdcde8e6e 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -29,6 +29,7 @@
 
 import cartopy.crs as ccrs
 import contextily as ctx
+import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -37,7 +38,6 @@
 from geopandas import GeoDataFrame, GeoSeries, points_from_xy
 from mpl_toolkits.axes_grid1 import make_axes_locatable
 from rasterio.warp import Resampling
-from xarray import DataArray
 
 import climada.util.coordinates as u_coord
 import climada.util.hdf5_handler as u_hdf5
@@ -1122,30 +1122,31 @@ def plot_basemap(
         self.to_crs(crs_ori, inplace=True)
         return axis
 
-    def write_hdf5(self, file_name, pickle_geometry=False):
+    def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
         """Write data frame and metadata in hdf5 format
 
         Parameters
         ----------
         file_name : str
             (path and) file name to write to.
-        pickle_geometry : bool
+        pickle_geometry_as_shapely : bool
             flag, indicating whether the "geometry" of the Exposures` `data` will be stored as
-            pickled shapely objects instead of wkb bytes. This is faster but less durable, because
-            pickled data may get unreadable for future shapely versions.
+            pickled shapely objects instead of wkb bytes. This has been the case for earlier
+            CLIMADA version, up to 6.0, and is perhaps faster but less durable,
+            because pickled data may evantually get unreadable for future shapely versions.
             Default: False
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
-        pandas_df = pd.DataFrame(self.data)
-        wkb_data = {}
+        pandas_df = pd.DataFrame(self.gdf)
+        wkb_columns = []
         for col in pandas_df.columns:
             if str(pandas_df[col].dtype) == "geometry":
-                if pickle_geometry:
-                    pandas_df[col] = np.asarray(self.data[col])
+                if pickle_geometry_as_shapely:
+                    pandas_df[col] = np.asarray(self.gdf[col])
                 else:
-                    wkb_data[col] = to_wkb_store(self.geometry)
-                    pandas_df.drop(columns=[col], inplace=True)
+                    pandas_df[col] = gpd.GeoSeries.to_wkb(pandas_df[col])
+                    wkb_columns.append(col)
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
@@ -1153,13 +1154,11 @@ def write_hdf5(self, file_name, pickle_geometry=False):
             # Write dataframe
             store.put("exposures", pandas_df)
 
-        if wkb_data:
-            store.put("wkb_data", wkb_data)
-
         var_meta = {}
         for var in type(self)._metadata:
             var_meta[var] = getattr(self, var)
         var_meta["crs"] = self.crs
+        var_meta["wkb_columns"] = wkb_columns
         store.get_storer("exposures").attrs.metadata = var_meta
 
         store.close()
@@ -1199,12 +1198,13 @@ def from_hdf5(cls, file_name):
             if crs is None and metadata.get("meta"):
                 crs = metadata["meta"].get("crs")
             data = pd.DataFrame(store["exposures"])
-            try:
-                wkb_data = store.get("wkb_data")
-            except KeyError:
-                wkb_data = {}
-            for col, val in wkb_data.items():
-                data[col] = from_wkb_store(val)
+
+            wkb_columns = (
+                metadata.pop("wkb_columns") if "wkb_columns" in metadata else []
+            )
+            for col in wkb_columns:
+                data[col] = gpd.GeoSeries.from_wkb(data[col])
+
             exp = cls(data, crs=crs)
             for key, val in metadata.items():
                 if key in type(exp)._metadata:  # pylint: disable=protected-access
@@ -1574,21 +1574,6 @@ def _read_mat_optional(exposures, data, var_names):
         pass
 
 
-def to_wkb_store(geometry: np.array, store):
-    wkb_data = geometry.to_wkb().to_numpy()
-    import h5py
-
-    wkb_dataset = h5py.Dataset(store)
-
-    # Store WKB as variable-length byte arrays
-    dt = h5py.vlen_dtype(np.dtype("uint8"))
-    wkb_dataset.dtype = dt
-    for i, geom_bytes in enumerate(wkb_data):
-        wkb_dataset[i] = np.frombuffer(geom_bytes, dtype="uint8")
-
-    return wkb_data
-
-
 def _read_mat_metadata(exposures, data, file_name, var_names):
     """Fill metadata in DataFrame object"""
     try:
diff --git a/climada/entity/exposures/test/test_base.py b/climada/entity/exposures/test/test_base.py
index 66e921cd4..55752785f 100644
--- a/climada/entity/exposures/test/test_base.py
+++ b/climada/entity/exposures/test/test_base.py
@@ -378,11 +378,11 @@ def test_read_template_pass(self):
 
     def test_io_hdf5_pass(self):
         """write and read hdf5"""
-        exp_df = Exposures(pd.read_excel(ENT_TEMPLATE_XLS), crs="epsg:32632")
-        exp_df.check()
+        exp = Exposures(pd.read_excel(ENT_TEMPLATE_XLS), crs="epsg:32632")
+
         # set metadata
-        exp_df.ref_year = 2020
-        exp_df.value_unit = "XSD"
+        exp.ref_year = 2020
+        exp.value_unit = "XSD"
 
         file_name = DATA_DIR.joinpath("test_hdf5_exp.h5")
 
@@ -390,48 +390,51 @@ def test_io_hdf5_pass(self):
         # PerformanceWarning would result in test failure here
         import warnings
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
-            exp_df.write_hdf5(file_name)
+        for pickle_geometry_as_shapely in [False, True]:
+            with warnings.catch_warnings():
+                warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
+                exp.write_hdf5(
+                    file_name, pickle_geometry_as_shapely=pickle_geometry_as_shapely
+                )
 
-        exp_read = Exposures.from_hdf5(file_name)
+            exp_read = Exposures.from_hdf5(file_name)
 
-        self.assertEqual(exp_df.ref_year, exp_read.ref_year)
-        self.assertEqual(exp_df.value_unit, exp_read.value_unit)
-        self.assertEqual(exp_df.description, exp_read.description)
-        np.testing.assert_array_equal(exp_df.latitude, exp_read.latitude)
-        np.testing.assert_array_equal(exp_df.longitude, exp_read.longitude)
-        np.testing.assert_array_equal(exp_df.value, exp_read.value)
-        np.testing.assert_array_equal(
-            exp_df.data["deductible"].values, exp_read.data["deductible"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["cover"].values, exp_read.data["cover"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["region_id"].values, exp_read.data["region_id"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["category_id"].values, exp_read.data["category_id"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["impf_TC"].values, exp_read.data["impf_TC"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["centr_TC"].values, exp_read.data["centr_TC"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["impf_FL"].values, exp_read.data["impf_FL"].values
-        )
-        np.testing.assert_array_equal(
-            exp_df.data["centr_FL"].values, exp_read.data["centr_FL"].values
-        )
+            self.assertEqual(exp.ref_year, exp_read.ref_year)
+            self.assertEqual(exp.value_unit, exp_read.value_unit)
+            self.assertEqual(exp.description, exp_read.description)
+            np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
+            np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
+            np.testing.assert_array_equal(exp.value, exp_read.value)
+            np.testing.assert_array_equal(
+                exp.data["deductible"].values, exp_read.data["deductible"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["cover"].values, exp_read.data["cover"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["region_id"].values, exp_read.data["region_id"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["category_id"].values, exp_read.data["category_id"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
+            )
+            np.testing.assert_array_equal(
+                exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
+            )
 
-        self.assertTrue(
-            u_coord.equal_crs(exp_df.crs, exp_read.crs),
-            f"{exp_df.crs} and {exp_read.crs} are different",
-        )
-        self.assertTrue(u_coord.equal_crs(exp_df.gdf.crs, exp_read.gdf.crs))
+            self.assertTrue(
+                u_coord.equal_crs(exp.crs, exp_read.crs),
+                f"{exp.crs} and {exp_read.crs} are different",
+            )
+            self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
 
 
 class TestAddSea(unittest.TestCase):

From 347f1f9a77f818dc6cf7db9ee1736e90f2b1928f Mon Sep 17 00:00:00 2001
From: Emanuel Schmid <51439563+emanuel-schmid@users.noreply.github.com>
Date: Tue, 6 May 2025 17:50:11 +0200
Subject: [PATCH 4/8] Update climada/entity/exposures/base.py

Co-authored-by: Lukas Riedel <34276446+peanutfun@users.noreply.github.com>
---
 climada/entity/exposures/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index fdcde8e6e..5c687d80d 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1145,7 +1145,7 @@ def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
                 if pickle_geometry_as_shapely:
                     pandas_df[col] = np.asarray(self.gdf[col])
                 else:
-                    pandas_df[col] = gpd.GeoSeries.to_wkb(pandas_df[col])
+                    pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
                     wkb_columns.append(col)
 
         # Avoid pandas PerformanceWarning when writing HDF5 data

From 9312b3044d45ff28aa091cf05985dbf348eefcd4 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Thu, 8 May 2025 09:46:30 +0200
Subject: [PATCH 5/8] abandon shapely pickling

---
 CHANGELOG.md                               |  2 +-
 climada/entity/exposures/base.py           | 15 +---
 climada/entity/exposures/test/test_base.py | 85 +++++++++++-----------
 3 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74780084e..373ee9cfe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,7 +22,7 @@ Removed:
 
 - `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 - World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
-- `Exposures.write_hdf5` pickles geometry data in WKB format by default, and not as `shapely` objects anymore. There is now a flag to keep the previous behavior.
+- `Exposures.write_hdf5` pickles geometry data in WKB format, which is faster and more sustainable. [#1051](https://github.com/CLIMADA-project/climada_python/pull/1051)
 
 ### Fixed
 
diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 5c687d80d..71545a6ef 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1122,19 +1122,13 @@ def plot_basemap(
         self.to_crs(crs_ori, inplace=True)
         return axis
 
-    def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
+    def write_hdf5(self, file_name):
         """Write data frame and metadata in hdf5 format
 
         Parameters
         ----------
         file_name : str
             (path and) file name to write to.
-        pickle_geometry_as_shapely : bool
-            flag, indicating whether the "geometry" of the Exposures` `data` will be stored as
-            pickled shapely objects instead of wkb bytes. This has been the case for earlier
-            CLIMADA version, up to 6.0, and is perhaps faster but less durable,
-            because pickled data may evantually get unreadable for future shapely versions.
-            Default: False
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
@@ -1142,11 +1136,8 @@ def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
         wkb_columns = []
         for col in pandas_df.columns:
             if str(pandas_df[col].dtype) == "geometry":
-                if pickle_geometry_as_shapely:
-                    pandas_df[col] = np.asarray(self.gdf[col])
-                else:
-                    pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
-                    wkb_columns.append(col)
+                pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
+                wkb_columns.append(col)
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
diff --git a/climada/entity/exposures/test/test_base.py b/climada/entity/exposures/test/test_base.py
index 55752785f..3a4ee3663 100644
--- a/climada/entity/exposures/test/test_base.py
+++ b/climada/entity/exposures/test/test_base.py
@@ -390,51 +390,48 @@ def test_io_hdf5_pass(self):
         # PerformanceWarning would result in test failure here
         import warnings
 
-        for pickle_geometry_as_shapely in [False, True]:
-            with warnings.catch_warnings():
-                warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
-                exp.write_hdf5(
-                    file_name, pickle_geometry_as_shapely=pickle_geometry_as_shapely
-                )
-
-            exp_read = Exposures.from_hdf5(file_name)
-
-            self.assertEqual(exp.ref_year, exp_read.ref_year)
-            self.assertEqual(exp.value_unit, exp_read.value_unit)
-            self.assertEqual(exp.description, exp_read.description)
-            np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
-            np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
-            np.testing.assert_array_equal(exp.value, exp_read.value)
-            np.testing.assert_array_equal(
-                exp.data["deductible"].values, exp_read.data["deductible"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["cover"].values, exp_read.data["cover"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["region_id"].values, exp_read.data["region_id"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["category_id"].values, exp_read.data["category_id"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
-            )
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
+            exp.write_hdf5(file_name=file_name)
+
+        exp_read = Exposures.from_hdf5(file_name)
+
+        self.assertEqual(exp.ref_year, exp_read.ref_year)
+        self.assertEqual(exp.value_unit, exp_read.value_unit)
+        self.assertEqual(exp.description, exp_read.description)
+        np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
+        np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
+        np.testing.assert_array_equal(exp.value, exp_read.value)
+        np.testing.assert_array_equal(
+            exp.data["deductible"].values, exp_read.data["deductible"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["cover"].values, exp_read.data["cover"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["region_id"].values, exp_read.data["region_id"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["category_id"].values, exp_read.data["category_id"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
+        )
 
-            self.assertTrue(
-                u_coord.equal_crs(exp.crs, exp_read.crs),
-                f"{exp.crs} and {exp_read.crs} are different",
-            )
-            self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
+        self.assertTrue(
+            u_coord.equal_crs(exp.crs, exp_read.crs),
+            f"{exp.crs} and {exp_read.crs} are different",
+        )
+        self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
 
 
 class TestAddSea(unittest.TestCase):

From 8973490ec399641f58609464d7c37a72f1979801 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Mon, 12 May 2025 08:38:12 +0200
Subject: [PATCH 6/8] simplify wkb columns collection

---
 climada/entity/exposures/base.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 71545a6ef..634c660fc 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1133,11 +1133,9 @@ def write_hdf5(self, file_name):
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
         pandas_df = pd.DataFrame(self.gdf)
-        wkb_columns = []
-        for col in pandas_df.columns:
-            if str(pandas_df[col].dtype) == "geometry":
-                pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
-                wkb_columns.append(col)
+        geocols = self.gdf.columns[self.gdf.dtypes == "geometry"].to_list()
+        for col in geocols:
+            pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
@@ -1149,7 +1147,7 @@ def write_hdf5(self, file_name):
         for var in type(self)._metadata:
             var_meta[var] = getattr(self, var)
         var_meta["crs"] = self.crs
-        var_meta["wkb_columns"] = wkb_columns
+        var_meta["wkb_columns"] = geocols
         store.get_storer("exposures").attrs.metadata = var_meta
 
         store.close()

From 5ff1e19fc0eaa45d5c2df26e788cc0ce64c8ec19 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Mon, 12 May 2025 08:58:07 +0200
Subject: [PATCH 7/8] simplify wkb conversion

---
 climada/entity/exposures/base.py           |  4 +---
 climada/entity/exposures/test/test_base.py | 10 +++++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 634c660fc..98d6c21cf 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1132,10 +1132,8 @@ def write_hdf5(self, file_name):
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
-        pandas_df = pd.DataFrame(self.gdf)
         geocols = self.gdf.columns[self.gdf.dtypes == "geometry"].to_list()
-        for col in geocols:
-            pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
+        pandas_df = self.data.to_wkb()
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
diff --git a/climada/entity/exposures/test/test_base.py b/climada/entity/exposures/test/test_base.py
index 3a4ee3663..77e1e50ec 100644
--- a/climada/entity/exposures/test/test_base.py
+++ b/climada/entity/exposures/test/test_base.py
@@ -384,6 +384,9 @@ def test_io_hdf5_pass(self):
         exp.ref_year = 2020
         exp.value_unit = "XSD"
 
+        # add another geometry column
+        exp.data["geocol2"] = exp.data.geometry.copy(deep=True)
+
         file_name = DATA_DIR.joinpath("test_hdf5_exp.h5")
 
         # pd.errors.PerformanceWarning should be suppressed. Therefore, make sure that
@@ -431,7 +434,12 @@ def test_io_hdf5_pass(self):
             u_coord.equal_crs(exp.crs, exp_read.crs),
             f"{exp.crs} and {exp_read.crs} are different",
         )
-        self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
+        self.assertTrue(u_coord.equal_crs(exp.data.crs, exp_read.data.crs))
+
+        self.assertTrue(exp_read.data["geocol2"].dtype == "geometry")
+        np.testing.assert_array_equal(
+            exp.data["geocol2"].geometry, exp_read.data["geocol2"].values
+        )
 
 
 class TestAddSea(unittest.TestCase):

From dea3e9993abd40b8a4a8fa1ba17d03ba8602d746 Mon Sep 17 00:00:00 2001
From: emanuel-schmid <schmide@ethz.ch>
Date: Mon, 12 May 2025 09:02:33 +0200
Subject: [PATCH 8/8] cosmetics

---
 climada/entity/exposures/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
index 98d6c21cf..1021dc7ab 100644
--- a/climada/entity/exposures/base.py
+++ b/climada/entity/exposures/base.py
@@ -1132,7 +1132,7 @@ def write_hdf5(self, file_name):
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
-        geocols = self.gdf.columns[self.gdf.dtypes == "geometry"].to_list()
+        geocols = self.data.columns[self.data.dtypes == "geometry"].to_list()
         pandas_df = self.data.to_wkb()
 
         # Avoid pandas PerformanceWarning when writing HDF5 data