diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index a2b8ef1e355..ee538f7a50e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -44,6 +44,8 @@ "GMT_IS_SURFACE", ] +METHODS = ["GMT_IS_DUPLICATE", "GMT_IS_REFERENCE"] + MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"] REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"] @@ -235,7 +237,7 @@ def __getitem__(self, name): value = c_get_enum(session, name.encode()) if value is None or value == -99999: - raise GMTCLibError("Constant '{}' doesn't exits in libgmt.".format(name)) + raise GMTCLibError(f"Constant '{name}' doesn't exist in libgmt.") return value @@ -733,7 +735,7 @@ def put_vector(self, dataset, column, vector): """ Attach a numpy 1D array as a column on a GMT dataset. - Use this functions to attach numpy array data to a GMT dataset and pass + Use this function to attach numpy array data to a GMT dataset and pass it to GMT modules. Wraps ``GMT_Put_Vector``. The dataset must be created by :meth:`~gmt.clib.Session.create_data` @@ -793,11 +795,72 @@ def put_vector(self, dataset, column, vector): ) ) + def put_strings(self, dataset, family, strings): + """ + Attach a numpy 1D array of dtype str as a column on a GMT dataset. + + Use this function to attach string type numpy array data to a GMT + dataset and pass it to GMT modules. Wraps ``GMT_Put_Strings``. + + The dataset must be created by :meth:`~gmt.clib.Session.create_data` + first. + + .. warning:: + The numpy array must be C contiguous in memory. If it comes from a + column slice of a 2d array, for example, you will have to make a + copy. Use :func:`numpy.ascontiguousarray` to make sure your vector + is contiguous (it won't copy if it already is). + + Parameters + ---------- + dataset : :class:`ctypes.c_void_p` + The ctypes void pointer to a ``GMT_Dataset``. Create it with + :meth:`~gmt.clib.Session.create_data`. + family : str + The family type of the dataset. Can be either ``GMT_IS_VECTOR`` or + ``GMT_IS_MATRIX``. + strings : numpy 1d-array + The array that will be attached to the dataset. Must be a 1d C + contiguous array. + + Raises + ------ + GMTCLibError + If given invalid input or ``GMT_Put_Strings`` exits with status != + 0. + + """ + c_put_strings = self.get_libgmt_func( + "GMT_Put_Strings", + argtypes=[ + ctp.c_void_p, + ctp.c_uint, + ctp.c_void_p, + ctp.POINTER(ctp.c_char_p), + ], + restype=ctp.c_int, + ) + + strings_pointer = (ctp.c_char_p * len(strings))() + strings_pointer[:] = np.char.encode(strings) + + family_int = self._parse_constant( + family, valid=FAMILIES, valid_modifiers=METHODS + ) + + status = c_put_strings( + self.session_pointer, family_int, dataset, strings_pointer + ) + if status != 0: + raise GMTCLibError( + f"Failed to put strings of type {strings.dtype} into dataset" + ) + def put_matrix(self, dataset, matrix, pad=0): """ Attach a numpy 2D array to a GMT dataset. - Use this functions to attach numpy array data to a GMT dataset and pass + Use this function to attach numpy array data to a GMT dataset and pass it to GMT modules. Wraps ``GMT_Put_Matrix``. The dataset must be created by :meth:`~gmt.clib.Session.create_data` @@ -1002,9 +1065,7 @@ def open_virtual_file(self, family, geometry, direction, data): family_int = self._parse_constant(family, valid=FAMILIES, valid_modifiers=VIAS) geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) direction_int = self._parse_constant( - direction, - valid=["GMT_IN", "GMT_OUT"], - valid_modifiers=["GMT_IS_REFERENCE", "GMT_IS_DUPLICATE"], + direction, valid=["GMT_IN", "GMT_OUT"], valid_modifiers=METHODS, ) buff = ctp.create_string_buffer(self["GMT_VF_LEN"]) @@ -1079,14 +1140,23 @@ def virtualfile_from_vectors(self, *vectors): """ # Conversion to a C-contiguous array needs to be done here and not in - # put_matrix because we need to maintain a reference to the copy while - # it is being used by the C API. Otherwise, the array would be garbage - # collected and the memory freed. Creating it in this context manager - # guarantees that the copy will be around until the virtual file is - # closed. The conversion is implicit in vectors_to_arrays. + # put_vector or put_strings because we need to maintain a reference to + # the copy while it is being used by the C API. Otherwise, the array + # would be garbage collected and the memory freed. Creating it in this + # context manager guarantees that the copy will be around until the + # virtual file is closed. The conversion is implicit in + # vectors_to_arrays. arrays = vectors_to_arrays(vectors) columns = len(arrays) + # Find arrays that are of string dtype from column 3 onwards + # Assumes that first 2 columns contains coordinates like longitude + # latitude, or datetime string types. + for col, array in enumerate(arrays[2:]): + if np.issubdtype(array.dtype, np.str_): + columns = col + 2 + break + rows = len(arrays[0]) if not all(len(i) == rows for i in arrays): raise GMTInvalidInput("All arrays must have same size.") @@ -1098,9 +1168,24 @@ def virtualfile_from_vectors(self, *vectors): family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0] ) - for col, array in enumerate(arrays): + # Use put_vector for columns with numerical type data + for col, array in enumerate(arrays[:columns]): self.put_vector(dataset, column=col, vector=array) + # Use put_strings for last column(s) with string type data + # Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings + string_arrays = arrays[columns:] + if string_arrays: + if len(string_arrays) == 1: + strings = string_arrays[0] + elif len(string_arrays) > 1: + strings = np.apply_along_axis( + func1d=" ".join, axis=0, arr=string_arrays + ) + self.put_strings( + dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings + ) + with self.open_virtual_file( family, geometry, "GMT_IN|GMT_IS_REFERENCE", dataset ) as vfile: diff --git a/pygmt/tests/test_clib.py b/pygmt/tests/test_clib.py index b593208af36..a94c2c578bf 100644 --- a/pygmt/tests/test_clib.py +++ b/pygmt/tests/test_clib.py @@ -27,6 +27,9 @@ TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +with clib.Session() as _lib: + gmt_version = Version(_lib.info["version"]) + @contextmanager def mock(session, func, returns=None, mock_func=None): @@ -399,6 +402,47 @@ def test_virtualfile_from_vectors(): assert output == expected +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) +def test_virtualfile_from_vectors_one_string_column(): + "Test passing in one column with string dtype into virtual file dataset" + size = 5 + x = np.arange(size, dtype=np.int32) + y = np.arange(size, size * 2, 1, dtype=np.int32) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str) + with clib.Session() as lib: + with lib.virtualfile_from_vectors(x, y, strings) as vfile: + with GMTTempFile() as outfile: + lib.call_module("convert", f"{vfile} ->{outfile.name}") + output = outfile.read(keep_tabs=True) + expected = "".join(f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings)) + assert output == expected + + +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) +def test_virtualfile_from_vectors_two_string_columns(): + "Test passing in two columns of string dtype into virtual file dataset" + size = 5 + x = np.arange(size, dtype=np.int32) + y = np.arange(size, size * 2, 1, dtype=np.int32) + strings1 = np.array(["a", "bc", "def", "ghij", "klmno"], dtype=np.str) + strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=np.str) + with clib.Session() as lib: + with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile: + with GMTTempFile() as outfile: + lib.call_module("convert", f"{vfile} ->{outfile.name}") + output = outfile.read(keep_tabs=True) + expected = "".join( + f"{h}\t{i}\t{j} {k}\n" for h, i, j, k in zip(x, y, strings1, strings2) + ) + assert output == expected + + def test_virtualfile_from_vectors_transpose(): "Test transforming matrix columns to virtual file dataset" dtypes = "float32 float64 int32 int64 uint32 uint64".split() diff --git a/pygmt/tests/test_clib_put_strings.py b/pygmt/tests/test_clib_put_strings.py new file mode 100644 index 00000000000..6fc7885901a --- /dev/null +++ b/pygmt/tests/test_clib_put_strings.py @@ -0,0 +1,67 @@ +""" +Test the functions that put string data into GMT. +""" +import numpy as np +import numpy.testing as npt +import pytest +from packaging.version import Version + +from .. import clib +from ..exceptions import GMTCLibError +from ..helpers import GMTTempFile + +with clib.Session() as _lib: + gmt_version = Version(_lib.info["version"]) + + +@pytest.mark.xfail( + condition=gmt_version < Version("6.1.1"), + reason="GMT_Put_Strings only works for GMT 6.1.1 and above", +) +def test_put_strings(): + "Check that assigning a numpy array of dtype str to a dataset works" + with clib.Session() as lib: + dataset = lib.create_data( + family="GMT_IS_DATASET|GMT_VIA_VECTOR", + geometry="GMT_IS_POINT", + mode="GMT_CONTAINER_ONLY", + dim=[2, 5, 1, 0], # columns, rows, layers, dtype + ) + x = np.array([1, 2, 3, 4, 5], dtype=np.int32) + y = np.array([6, 7, 8, 9, 10], dtype=np.int32) + strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str) + lib.put_vector(dataset, column=lib["GMT_X"], vector=x) + lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) + lib.put_strings( + dataset, family="GMT_IS_VECTOR|GMT_IS_DUPLICATE", strings=strings + ) + # Turns out wesn doesn't matter for Datasets + wesn = [0] * 6 + # Save the data to a file to see if it's being accessed correctly + with GMTTempFile() as tmp_file: + lib.write_data( + "GMT_IS_VECTOR", + "GMT_IS_POINT", + "GMT_WRITE_SET", + wesn, + tmp_file.name, + dataset, + ) + # Load the data and check that it's correct + newx, newy, newstrings = tmp_file.loadtxt( + unpack=True, dtype=[("x", np.int32), ("y", np.int32), ("text", "