Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

**Breaking**: data_kind: data is None and required now returns the 'empty' kind #3482

Merged
merged 30 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0c82b3c
data_kind: Refactor the if-else statements into if-return statements
seisman Oct 3, 2024
808755d
data_kind: Now 'matrix' represents a 2-D numpy array and unrecognizd …
seisman Oct 3, 2024
0eb4f8f
Make 'data' a required parameter
seisman Oct 3, 2024
9891b2c
Fix x2sys_cross as pd.DataFrame is 'vectors' kind now
seisman Oct 3, 2024
a9d094c
Fix legend as now 'vectors' doesn't mean data is None
seisman Oct 3, 2024
a0e1848
data_kind: data is None and required now returns the 'none' kind
seisman Oct 3, 2024
3d8be4d
Add docstrings for stringio
seisman Oct 3, 2024
7c104a9
Merge branch 'data_kind/return' into refactor/data_kind
seisman Oct 4, 2024
5790923
Merge branch 'main' into refactor/data_kind
seisman Oct 7, 2024
6954c5d
Fix docstrings
seisman Oct 7, 2024
ddda3b9
Merge branch 'refactor/data_kind' into data_kind/vectors-none
seisman Oct 7, 2024
9300ca3
Merge branch 'main' into refactor/data_kind
seisman Oct 7, 2024
2701a4a
Merge branch 'main' into refactor/data_kind
seisman Oct 8, 2024
c3cb459
Merge branch 'refactor/data_kind' into data_kind/vectors-none
seisman Oct 8, 2024
7fcf57f
Merge branch 'main' into refactor/data_kind
seisman Oct 10, 2024
91eb1b6
Merge branch 'refactor/data_kind' into data_kind/vectors-none
seisman Oct 10, 2024
a1e67d3
Rename 'none' kind to 'empty'
seisman Oct 11, 2024
991f688
Merge branch 'main' into refactor/data_kind
seisman Oct 11, 2024
c83c9f8
Merge branch 'refactor/data_kind' into data_kind/vectors-none
seisman Oct 11, 2024
ea9ddaa
Update pygmt/helpers/utils.py
seisman Oct 14, 2024
6f55375
clib: Switch the order of if-conditions to improve the Session.call_m…
seisman Oct 11, 2024
3252988
Fix the conversion error for pandas.Series with missing values in pan…
seisman Oct 11, 2024
003d8a1
Add type hints for GMT anchor codes (#3459)
seisman Oct 11, 2024
edf80c0
clib.Session: Add type hints and reformat docstrings (part 1) (#3504)
seisman Oct 11, 2024
fde7901
clib.conversion: Add type hints and improve docstrings for dataarray_…
seisman Oct 11, 2024
78fdfb1
Update pygmt/helpers/utils.py
seisman Oct 14, 2024
ebb3257
Merge branch 'main' into data_kind/vectors-none
seisman Oct 16, 2024
e2b47b6
Merge remote-tracking branch 'origin/data_kind/vectors-none' into dat…
seisman Oct 16, 2024
a469acf
Fix docstrings
seisman Oct 16, 2024
74bc161
Remove duplicated doctest
seisman Oct 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,24 +1767,22 @@ def virtualfile_in( # noqa: PLR0912
if check_kind == "raster":
valid_kinds += ("grid", "image")
elif check_kind == "vector":
valid_kinds += ("matrix", "vectors", "geojson")
valid_kinds += ("empty", "matrix", "vectors", "geojson")
if kind not in valid_kinds:
raise GMTInvalidInput(
f"Unrecognized data type for {check_kind}: {type(data)}"
)

# Decide which virtualfile_from_ function to use
_virtualfile_from = {
"file": contextlib.nullcontext,
"arg": contextlib.nullcontext,
"empty": self.virtualfile_from_vectors,
"file": contextlib.nullcontext,
"geojson": tempfile_from_geojson,
"grid": self.virtualfile_from_grid,
"image": tempfile_from_image,
"stringio": self.virtualfile_from_stringio,
# Note: virtualfile_from_matrix is not used because a matrix can be
# converted to vectors instead, and using vectors allows for better
# handling of string type inputs (e.g. for datetime data types)
"matrix": self.virtualfile_from_vectors,
"matrix": self.virtualfile_from_matrix,
"vectors": self.virtualfile_from_vectors,
}[kind]

Expand All @@ -1800,30 +1798,33 @@ def virtualfile_in( # noqa: PLR0912
)
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
elif kind == "vectors":
elif kind == "empty":
# data is None, so data must be given via x/y/z.
_data = [x, y]
if z is not None:
_data.append(z)
if extra_arrays:
_data.extend(extra_arrays)
elif kind == "matrix": # turn 2-D arrays into list of vectors
elif kind == "vectors":
if hasattr(data, "items") and not hasattr(data, "to_frame"):
# pandas.DataFrame or xarray.Dataset types.
# pandas.Series will be handled below like a 1-D numpy.ndarray.
_data = [array for _, array in data.items()]
elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf":
# Just use virtualfile_from_matrix for 2-D numpy.ndarray
# which are signed integer (i), unsigned integer (u) or
# floating point (f) types
_virtualfile_from = self.virtualfile_from_matrix
_data = (data,)
else:
# Python list, tuple, numpy.ndarray, and pandas.Series types
_data = np.atleast_2d(np.asanyarray(data).T)
elif kind == "matrix":
# GMT can only accept a 2-D matrix which are signed integer (i), unsigned
# integer (u) or floating point (f) types. For other data types, we need to
# use virtualfile_from_vectors instead, which turns the matrix into list of
# vectors and allows for better handling of string type inputs (e.g. for
# datetime data types).
_data = (data,)
if data.dtype.kind not in "iuf":
_virtualfile_from = self.virtualfile_from_vectors

# Finally create the virtualfile from the data, to be passed into GMT
file_context = _virtualfile_from(*_data)

return file_context

def virtualfile_from_data(
Expand Down
45 changes: 28 additions & 17 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from collections.abc import Iterable, Mapping, Sequence
from typing import Any, Literal

import numpy as np
import xarray as xr
from pygmt.encodings import charset
from pygmt.exceptions import GMTInvalidInput
Expand Down Expand Up @@ -190,7 +191,7 @@ def _check_encoding(
def data_kind(
data: Any, required: bool = True
) -> Literal[
"arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors"
"arg", "empty", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors"
]:
r"""
Check the kind of data that is provided to a module.
Expand All @@ -200,15 +201,20 @@ def data_kind(

- ``"arg"``: ``data`` is ``None`` and ``required=False``, or bool, int, float,
representing an optional argument, used for dealing with optional virtual files
- ``"empty"`: ``data`` is ``None`` and ``required=True``. It means the data is given
via a series of vectors like x/y/z
- ``"file"``: a string or a :class:`pathlib.PurePath` object or a sequence of them,
representing one or more file names
- ``"geojson"``: a geo-like Python object that implements ``__geo_interface__``
(e.g., geopandas.GeoDataFrame or shapely.geometry)
- ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D
- ``"image"``: a 3-D :class:`xarray.DataArray` object
- ``"stringio"``: a :class:`io.StringIO` object
- ``"matrix"``: anything else that is not ``None``
- ``"vectors"``: ``data`` is ``None`` and ``required=True``
- ``"matrix"``: a 2-D :class:`numpy.ndarray` object
- ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized
data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary
with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like
objects.

Parameters
----------
Expand Down Expand Up @@ -238,6 +244,11 @@ def data_kind(
>>> data_kind(data=None, required=False)
'arg'

The "empty" kind:

>>> data_kind(data=None)
'empty'
seisman marked this conversation as resolved.
Show resolved Hide resolved

The "file" kind:

>>> [data_kind(data=data) for data in ("file.txt", ("file1.txt", "file2.txt"))]
Expand Down Expand Up @@ -268,31 +279,31 @@ def data_kind(

The "matrix"`` kind:

>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
'matrix'
>>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray
'matrix'

The "vectors" kind:

>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
'vectors'
>>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray
'matrix'
'vectors'
>>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset
'matrix'
'vectors'
>>> data_kind(data=[1, 2, 3]) # 1-D sequence
'matrix'
'vectors'
>>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences
'matrix'
'vectors'
>>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary
'matrix'
'vectors'
>>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame
'matrix'
'vectors'
>>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series
'matrix'

The "vectors" kind:

>>> data_kind(data=None)
'vectors'
"""
match data:
case None if required: # No data provided and required=True.
kind = "empty"
case str() | pathlib.PurePath(): # One file.
kind = "file"
case list() | tuple() if all(
Expand All @@ -312,7 +323,7 @@ def data_kind(
# geopandas.GeoDataFrame or shapely.geometry).
# Reference: https://gist.github.com/sgillies/2217756
kind = "geojson"
case x if x is not None: # Any not-None is considered as a matrix.
case np.ndarray() if data.ndim == 2: # A 2-D numpy.ndarray object.
kind = "matrix"
case _: # Fall back to "vectors" if data is None and required=True.
kind = "vectors"
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/legend.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def legend(
kwargs["F"] = box

kind = data_kind(spec)
if kind not in {"vectors", "file", "stringio"}: # kind="vectors" means spec is None
if kind not in {"empty", "file", "stringio"}:
raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}")
if kind == "file" and is_nonstr_iter(spec):
raise GMTInvalidInput("Only one legend specification file is allowed.")
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def plot(self, data=None, x=None, y=None, size=None, direction=None, **kwargs):

kind = data_kind(data)
extra_arrays = []
if kind == "vectors": # Add more columns for vectors input
if kind == "empty": # Add more columns for vectors input
# Parameters for vector styles
if (
kwargs.get("S") is not None
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/plot3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def plot3d(
kind = data_kind(data)
extra_arrays = []

if kind == "vectors": # Add more columns for vectors input
if kind == "empty": # Add more columns for vectors input
# Parameters for vector styles
if (
kwargs.get("S") is not None
Expand Down
4 changes: 2 additions & 2 deletions pygmt/src/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def text_( # noqa: PLR0912
raise GMTInvalidInput("'text' can't be None or array when 'position' is given.")
if textfiles is not None and text is not None:
raise GMTInvalidInput("'text' can't be specified when 'textfiles' is given.")
if kind == "vectors" and text is None:
if kind == "empty" and text is None:
raise GMTInvalidInput("Must provide text with x/y pairs.")

# Arguments that can accept arrays.
Expand All @@ -220,7 +220,7 @@ def text_( # noqa: PLR0912

extra_arrays = []
confdict = {}
if kind == "vectors":
if kind == "empty":
for arg, flag, name in array_args:
if is_nonstr_iter(arg):
kwargs["F"] += flag
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/x2sys_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def x2sys_cross(
match data_kind(track):
case "file":
file_contexts.append(contextlib.nullcontext(track))
case "matrix":
case "vectors":
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file
tagfile = Path(
Expand Down