From 3752760ea836d55fd8e977a84c22e6b903cad7d2 Mon Sep 17 00:00:00 2001
From: JB Lovland <jlov@equinor.com>
Date: Mon, 15 Jan 2024 15:07:27 +0100
Subject: [PATCH] CLN: Lazy import pyarrow

---
 src/fmu/dataio/_objectdata_provider.py | 45 ++++++++++++--------------
 src/fmu/dataio/_utils.py               | 33 ++++++++-----------
 tests/conftest.py                      | 43 +++++++++++++-----------
 tests/test_units/test_table.py         |  5 +--
 4 files changed, 61 insertions(+), 65 deletions(-)

diff --git a/src/fmu/dataio/_objectdata_provider.py b/src/fmu/dataio/_objectdata_provider.py
index 3f779358b..0a6ee8232 100644
--- a/src/fmu/dataio/_objectdata_provider.py
+++ b/src/fmu/dataio/_objectdata_provider.py
@@ -99,13 +99,6 @@
 from ._definitions import ALLOWED_CONTENTS, STANDARD_TABLE_INDEX_COLUMNS, _ValidFormats
 from ._utils import generate_description, parse_timedata
 
-try:
-    import pyarrow as pa
-except ImportError:
-    HAS_PYARROW = False
-else:
-    HAS_PYARROW = True
-
 logger: Final = logging.getLogger(__name__)
 
 
@@ -304,21 +297,6 @@ def _derive_objectdata(self) -> dict:
             )
             result["spec"], result["bbox"] = self._derive_spec_bbox_dataframe()
 
-        elif HAS_PYARROW and isinstance(self.obj, pa.Table):
-            result["table_index"] = self._derive_index()
-
-            result["subtype"] = "ArrowTable"
-            result["classname"] = "table"
-            result["layout"] = "table"
-            result["efolder"] = "tables"
-            result["fmt"] = self.dataio.arrow_fformat
-            result["extension"] = self._validate_get_ext(
-                result["fmt"],
-                result["subtype"],
-                _ValidFormats().table,
-            )
-            result["spec"], result["bbox"] = self._derive_spec_bbox_arrowtable()
-
         elif isinstance(self.obj, dict):
             result["subtype"] = "JSON"
             result["classname"] = "dictionary"
@@ -333,9 +311,26 @@ def _derive_objectdata(self) -> dict:
             result["spec"], result["bbox"] = self._derive_spec_bbox_dict()
 
         else:
-            raise NotImplementedError(
-                "This data type is not (yet) supported: ", type(self.obj)
-            )
+            from pyarrow import Table
+
+            if isinstance(self.obj, Table):
+                result["table_index"] = self._derive_index()
+
+                result["subtype"] = "ArrowTable"
+                result["classname"] = "table"
+                result["layout"] = "table"
+                result["efolder"] = "tables"
+                result["fmt"] = self.dataio.arrow_fformat
+                result["extension"] = self._validate_get_ext(
+                    result["fmt"],
+                    result["subtype"],
+                    _ValidFormats().table,
+                )
+                result["spec"], result["bbox"] = self._derive_spec_bbox_arrowtable()
+            else:
+                raise NotImplementedError(
+                    "This data type is not (yet) supported: ", type(self.obj)
+                )
 
         # override efolder with forcefolder as exception!
         if self.dataio.forcefolder and not self.dataio.forcefolder.startswith("/"):
diff --git a/src/fmu/dataio/_utils.py b/src/fmu/dataio/_utils.py
index a735ccb7f..3cab7f19c 100644
--- a/src/fmu/dataio/_utils.py
+++ b/src/fmu/dataio/_utils.py
@@ -15,20 +15,11 @@
 from typing import Any, Final, Literal
 
 import pandas as pd
+import xtgeo
 import yaml
 
 from fmu.config import utilities as ut
 
-try:
-    import pyarrow as pa
-except ImportError:
-    HAS_PYARROW = False
-else:
-    HAS_PYARROW = True
-    from pyarrow import feather
-
-import xtgeo
-
 from . import _design_kw, _oyaml as oyaml
 
 logger: Final = logging.getLogger(__name__)
@@ -149,15 +140,19 @@ def export_file(
     elif filename.suffix == ".csv" and isinstance(obj, pd.DataFrame):
         includeindex = flag == "include_index"
         obj.to_csv(filename, index=includeindex)
-    elif filename.suffix == ".arrow" and HAS_PYARROW and isinstance(obj, pa.Table):
-        # comment taken from equinor/webviz_subsurface/smry2arrow.py
-
-        # Writing here is done through the feather import, but could also be done using
-        # pa.RecordBatchFileWriter.write_table() with a few pa.ipc.IpcWriteOptions(). It
-        # is convenient to use feather since it has ready configured defaults and the
-        # actual file format is the same
-        # (https://arrow.apache.org/docs/python/feather.html)
-        feather.write_feather(obj, dest=filename)
+    elif filename.suffix == ".arrow":
+        from pyarrow import Table
+
+        if isinstance(obj, Table):
+            from pyarrow import feather
+            # comment taken from equinor/webviz_subsurface/smry2arrow.py
+
+            # Writing here is done through the feather import, but could also be
+            # done using pa.RecordBatchFileWriter.write_table() with a few
+            # pa.ipc.IpcWriteOptions(). It is convenient to use feather since it
+            # has ready configured defaults and the actual file format is the same
+            # (https://arrow.apache.org/docs/python/feather.html)
+            feather.write_feather(obj, dest=filename)
     elif filename.suffix == ".json":
         with open(filename, "w") as stream:
             json.dump(obj, stream)
diff --git a/tests/conftest.py b/tests/conftest.py
index 1f2c9f40f..e19b07001 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,22 +9,14 @@
 from functools import wraps
 from pathlib import Path
 
+import fmu.dataio as dio
 import pandas as pd
 import pytest
 import xtgeo
 import yaml
 from fmu.config import utilities as ut
-from termcolor import cprint
-
-try:
-    import pyarrow as pa
-except ImportError:
-    HAS_PYARROW = False
-else:
-    HAS_PYARROW = True
-
-import fmu.dataio as dio
 from fmu.dataio.dataio import ExportData, read_metadata
+from termcolor import cprint
 
 logger = logging.getLogger(__name__)
 
@@ -478,12 +470,19 @@ def fixture_wellpicks():
 @pytest.fixture(name="arrowtable", scope="module", autouse=True)
 def fixture_arrowtable():
     """Create an arrow table instance."""
-    table = None
-    if HAS_PYARROW:
-        logger.info("Ran %s", inspect.currentframe().f_code.co_name)
-        dfr = pd.DataFrame({"COL1": [1, 2, 3, 4], "COL2": [99.0, 98.0, 97.0, 96.0]})
-        table = pa.Table.from_pandas(dfr)
-    return table
+    try:
+        from pyarrow import Table
+
+        return Table.from_pandas(
+            pd.DataFrame(
+                {
+                    "COL1": [1, 2, 3, 4],
+                    "COL2": [99.0, 98.0, 97.0, 96.0],
+                }
+            )
+        )
+    except ImportError:
+        return None
 
 
 @pytest.fixture(name="aggr_surfs_mean", scope="module", autouse=True)
@@ -554,8 +553,9 @@ def fixture_drogon_sum():
     Returns:
         pa.Table: table with summary data
     """
+    from pyarrow import feather
     path = ROOTPWD / "tests/data/drogon/tabular/summary.arrow"
-    return pa.feather.read_table(path)
+    return feather.read_table(path)
 
 
 @pytest.fixture(name="mock_volumes")
@@ -582,8 +582,13 @@ def fixture_drogon_volumes():
     Returns:
         pa.Table: table with summary data
     """
-    path = ROOTPWD / "tests/data/drogon/tabular/geogrid--vol.csv"
-    return pa.Table.from_pandas(pd.read_csv(path))
+    from pyarrow import Table
+
+    return Table.from_pandas(
+        pd.read_csv(
+            ROOTPWD / "tests/data/drogon/tabular/geogrid--vol.csv",
+        )
+    )
 
 
 # ======================================================================================
diff --git a/tests/test_units/test_table.py b/tests/test_units/test_table.py
index 5d4f05bdb..39e41d286 100644
--- a/tests/test_units/test_table.py
+++ b/tests/test_units/test_table.py
@@ -2,7 +2,6 @@
 """
 from pathlib import Path
 
-import pyarrow as pa
 import pytest
 from fmu.config.utilities import yaml_load
 from fmu.dataio import ExportData
@@ -83,9 +82,11 @@ def test_derive_summary_index_pyarrow(mock_summary, globalconfig2):
         mock_summary (pd.DataFrame): summary "like" dataframe
         globalconfig2 (dict): global variables dict
     """
+    from pyarrow import Table
+
     answer = ["DATE"]
     exd = ExportData(config=globalconfig2, content="timeseries")
-    path = exd.export(pa.Table.from_pandas(mock_summary), name="baretull")
+    path = exd.export(Table.from_pandas(mock_summary), name="baretull")
     assert_correct_table_index(path, answer)