Skip to content

Commit

Permalink
CLN: Lazy import pyarrow
Browse files Browse the repository at this point in the history
  • Loading branch information
JB Lovland committed Jan 15, 2024
1 parent f9ba374 commit 3752760
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 65 deletions.
45 changes: 20 additions & 25 deletions src/fmu/dataio/_objectdata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,6 @@
from ._definitions import ALLOWED_CONTENTS, STANDARD_TABLE_INDEX_COLUMNS, _ValidFormats
from ._utils import generate_description, parse_timedata

try:
import pyarrow as pa
except ImportError:
HAS_PYARROW = False
else:
HAS_PYARROW = True

logger: Final = logging.getLogger(__name__)


Expand Down Expand Up @@ -304,21 +297,6 @@ def _derive_objectdata(self) -> dict:
)
result["spec"], result["bbox"] = self._derive_spec_bbox_dataframe()

elif HAS_PYARROW and isinstance(self.obj, pa.Table):
result["table_index"] = self._derive_index()

result["subtype"] = "ArrowTable"
result["classname"] = "table"
result["layout"] = "table"
result["efolder"] = "tables"
result["fmt"] = self.dataio.arrow_fformat
result["extension"] = self._validate_get_ext(
result["fmt"],
result["subtype"],
_ValidFormats().table,
)
result["spec"], result["bbox"] = self._derive_spec_bbox_arrowtable()

elif isinstance(self.obj, dict):
result["subtype"] = "JSON"
result["classname"] = "dictionary"
Expand All @@ -333,9 +311,26 @@ def _derive_objectdata(self) -> dict:
result["spec"], result["bbox"] = self._derive_spec_bbox_dict()

else:
raise NotImplementedError(
"This data type is not (yet) supported: ", type(self.obj)
)
from pyarrow import Table

if isinstance(self.obj, Table):
result["table_index"] = self._derive_index()

result["subtype"] = "ArrowTable"
result["classname"] = "table"
result["layout"] = "table"
result["efolder"] = "tables"
result["fmt"] = self.dataio.arrow_fformat
result["extension"] = self._validate_get_ext(
result["fmt"],
result["subtype"],
_ValidFormats().table,
)
result["spec"], result["bbox"] = self._derive_spec_bbox_arrowtable()
else:
raise NotImplementedError(
"This data type is not (yet) supported: ", type(self.obj)
)

# override efolder with forcefolder as exception!
if self.dataio.forcefolder and not self.dataio.forcefolder.startswith("/"):
Expand Down
33 changes: 14 additions & 19 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,11 @@
from typing import Any, Final, Literal

import pandas as pd
import xtgeo
import yaml

from fmu.config import utilities as ut

try:
import pyarrow as pa
except ImportError:
HAS_PYARROW = False
else:
HAS_PYARROW = True
from pyarrow import feather

import xtgeo

from . import _design_kw, _oyaml as oyaml

logger: Final = logging.getLogger(__name__)
Expand Down Expand Up @@ -149,15 +140,19 @@ def export_file(
elif filename.suffix == ".csv" and isinstance(obj, pd.DataFrame):
includeindex = flag == "include_index"
obj.to_csv(filename, index=includeindex)
elif filename.suffix == ".arrow" and HAS_PYARROW and isinstance(obj, pa.Table):
# comment taken from equinor/webviz_subsurface/smry2arrow.py

# Writing here is done through the feather import, but could also be done using
# pa.RecordBatchFileWriter.write_table() with a few pa.ipc.IpcWriteOptions(). It
# is convenient to use feather since it has ready configured defaults and the
# actual file format is the same
# (https://arrow.apache.org/docs/python/feather.html)
feather.write_feather(obj, dest=filename)
elif filename.suffix == ".arrow":
from pyarrow import Table

if isinstance(obj, Table):
from pyarrow import feather
# comment taken from equinor/webviz_subsurface/smry2arrow.py

# Writing here is done through the feather import, but could also be
# done using pa.RecordBatchFileWriter.write_table() with a few
# pa.ipc.IpcWriteOptions(). It is convenient to use feather since it
# has ready configured defaults and the actual file format is the same
# (https://arrow.apache.org/docs/python/feather.html)
feather.write_feather(obj, dest=filename)
elif filename.suffix == ".json":
with open(filename, "w") as stream:
json.dump(obj, stream)
Expand Down
43 changes: 24 additions & 19 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,14 @@
from functools import wraps
from pathlib import Path

import fmu.dataio as dio
import pandas as pd
import pytest
import xtgeo
import yaml
from fmu.config import utilities as ut
from termcolor import cprint

try:
import pyarrow as pa
except ImportError:
HAS_PYARROW = False
else:
HAS_PYARROW = True

import fmu.dataio as dio
from fmu.dataio.dataio import ExportData, read_metadata
from termcolor import cprint

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -478,12 +470,19 @@ def fixture_wellpicks():
@pytest.fixture(name="arrowtable", scope="module", autouse=True)
def fixture_arrowtable():
"""Create an arrow table instance."""
table = None
if HAS_PYARROW:
logger.info("Ran %s", inspect.currentframe().f_code.co_name)
dfr = pd.DataFrame({"COL1": [1, 2, 3, 4], "COL2": [99.0, 98.0, 97.0, 96.0]})
table = pa.Table.from_pandas(dfr)
return table
try:
from pyarrow import Table

return Table.from_pandas(
pd.DataFrame(
{
"COL1": [1, 2, 3, 4],
"COL2": [99.0, 98.0, 97.0, 96.0],
}
)
)
except ImportError:
return None


@pytest.fixture(name="aggr_surfs_mean", scope="module", autouse=True)
Expand Down Expand Up @@ -554,8 +553,9 @@ def fixture_drogon_sum():
Returns:
pa.Table: table with summary data
"""
from pyarrow import feather
path = ROOTPWD / "tests/data/drogon/tabular/summary.arrow"
return pa.feather.read_table(path)
return feather.read_table(path)


@pytest.fixture(name="mock_volumes")
Expand All @@ -582,8 +582,13 @@ def fixture_drogon_volumes():
Returns:
pa.Table: table with summary data
"""
path = ROOTPWD / "tests/data/drogon/tabular/geogrid--vol.csv"
return pa.Table.from_pandas(pd.read_csv(path))
from pyarrow import Table

return Table.from_pandas(
pd.read_csv(
ROOTPWD / "tests/data/drogon/tabular/geogrid--vol.csv",
)
)


# ======================================================================================
Expand Down
5 changes: 3 additions & 2 deletions tests/test_units/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
from pathlib import Path

import pyarrow as pa
import pytest
from fmu.config.utilities import yaml_load
from fmu.dataio import ExportData
Expand Down Expand Up @@ -83,9 +82,11 @@ def test_derive_summary_index_pyarrow(mock_summary, globalconfig2):
mock_summary (pd.DataFrame): summary "like" dataframe
globalconfig2 (dict): global variables dict
"""
from pyarrow import Table

answer = ["DATE"]
exd = ExportData(config=globalconfig2, content="timeseries")
path = exd.export(pa.Table.from_pandas(mock_summary), name="baretull")
path = exd.export(Table.from_pandas(mock_summary), name="baretull")
assert_correct_table_index(path, answer)


Expand Down

0 comments on commit 3752760

Please sign in to comment.