From 7138a352a7075a31da6174951c609f434e50e59c Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Tue, 16 Jul 2024 16:40:21 -0400 Subject: [PATCH 01/42] Add new type to header --- gsd/gsd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gsd/gsd.h b/gsd/gsd.h index 194b3750..388e4201 100644 --- a/gsd/gsd.h +++ b/gsd/gsd.h @@ -48,7 +48,10 @@ extern "C" GSD_TYPE_FLOAT, /// 64-bit floating point number. - GSD_TYPE_DOUBLE + GSD_TYPE_DOUBLE, + + /// 8-bit character. + GSD_TYPE_CHARACTER }; /// Flag for GSD file open options From 4ebc9bb4303490308beeeee66cbc92c90f8901a7 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Tue, 16 Jul 2024 16:41:58 -0400 Subject: [PATCH 02/42] Add new type to gsd_sizeof_type --- gsd/gsd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gsd/gsd.c b/gsd/gsd.c index f20e0f02..2ec94fd0 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -2335,6 +2335,10 @@ size_t gsd_sizeof_type(enum gsd_type type) { val = sizeof(double); } + else if (type == GSD_TYPE_CHARACTER) + { + val = sizeof(char); + } else { return 0; From b11cce5d854cb017d69f1461a5a7818c083cc4af Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 10:33:28 -0400 Subject: [PATCH 03/42] Add new type to cython files --- gsd/fl.pyx | 142 +++++++++++++++++++++++++++++++++---------------- gsd/libgsd.pxd | 1 + 2 files changed, 98 insertions(+), 45 deletions(-) diff --git a/gsd/fl.pyx b/gsd/fl.pyx index 03849449..760e1f6f 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -22,6 +22,7 @@ from libc.stdint cimport uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t,\ from libc.errno cimport errno cimport gsd.libgsd as libgsd cimport numpy +cimport cython logger = logging.getLogger('gsd.fl') @@ -145,6 +146,21 @@ cdef void * __get_ptr_float64(data): else: return &data_array_float64[0, 0] +cdef void * __get_ptr_char(data): + # cdef numpy.ndarray[numpy.str_, ndim=2, mode="c", cast=True] data_array_char + cdef size_t address + address = data.__array_interface__["data"][0] + # data_array_char: cython.char[::1] = data.reshape(data.size) + # data_array_char = data + # cdef char *data_array_char + # data_array_char = cython.address(data.data[0]) + if (data.size == 0): + return NULL + else: + # return cython.address(data_array_char.data[0]) + # return cython.address(data_array_char[0]) + return address + def open(name, mode, application=None, schema=None, schema_version=None): """open(name, mode, application=None, schema=None, schema_version=None) @@ -557,59 +573,84 @@ cdef class GSDFile: if not self.__is_open: raise ValueError("File is not open") - data_array = numpy.ascontiguousarray(data) - if data_array is not data: - logger.warning('implicit data copy when writing chunk: ' + name) - data_array = data_array.view() - cdef uint64_t N cdef uint32_t M - if len(data_array.shape) > 2: - raise ValueError("GSD can only write 1 or 2 dimensional arrays: " - + name) + cdef libgsd.gsd_type gsd_type + cdef void *data_ptr - if len(data_array.shape) == 1: - data_array = data_array.reshape([data_array.shape[0], 1]) + # Special behavior for handling strings + if type(data) is str: + bytes_array = numpy.array([data], dtype=numpy.dtype((bytes, len(data)))) + bytes_view = bytes_array.view(dtype=numpy.int8).reshape((len(data),1)) - N = data_array.shape[0] - M = data_array.shape[1] + N = len(data) + M = 1 - cdef libgsd.gsd_type gsd_type - cdef void *data_ptr - if data_array.dtype == numpy.uint8: - gsd_type = libgsd.GSD_TYPE_UINT8 - data_ptr = __get_ptr_uint8(data_array) - elif data_array.dtype == numpy.uint16: - gsd_type = libgsd.GSD_TYPE_UINT16 - data_ptr = __get_ptr_uint16(data_array) - elif data_array.dtype == numpy.uint32: - gsd_type = libgsd.GSD_TYPE_UINT32 - data_ptr = __get_ptr_uint32(data_array) - elif data_array.dtype == numpy.uint64: - gsd_type = libgsd.GSD_TYPE_UINT64 - data_ptr = __get_ptr_uint64(data_array) - elif data_array.dtype == numpy.int8: - gsd_type = libgsd.GSD_TYPE_INT8 - data_ptr = __get_ptr_int8(data_array) - elif data_array.dtype == numpy.int16: - gsd_type = libgsd.GSD_TYPE_INT16 - data_ptr = __get_ptr_int16(data_array) - elif data_array.dtype == numpy.int32: - gsd_type = libgsd.GSD_TYPE_INT32 - data_ptr = __get_ptr_int32(data_array) - elif data_array.dtype == numpy.int64: - gsd_type = libgsd.GSD_TYPE_INT64 - data_ptr = __get_ptr_int64(data_array) - elif data_array.dtype == numpy.float32: - gsd_type = libgsd.GSD_TYPE_FLOAT - data_ptr = __get_ptr_float32(data_array) - elif data_array.dtype == numpy.float64: - gsd_type = libgsd.GSD_TYPE_DOUBLE - data_ptr = __get_ptr_float64(data_array) + gsd_type = libgsd.GSD_TYPE_CHARACTER + data_ptr = __get_ptr_int8(bytes_view) + + # Non-string behavior else: - raise ValueError("invalid type for chunk: " + name) + data_array = numpy.ascontiguousarray(data) + + if data_array is not data: + logger.warning('implicit data copy when writing chunk: ' + name) + data_array = data_array.view() + + + + if len(data_array.shape) > 2: + raise ValueError("GSD can only write 1 or 2 dimensional arrays: " + + name) + + if len(data_array.shape) == 1: + data_array = data_array.reshape([data_array.shape[0], 1]) + N = data_array.shape[0] + M = data_array.shape[1] + + if data_array.dtype == numpy.uint8: + gsd_type = libgsd.GSD_TYPE_UINT8 + data_ptr = __get_ptr_uint8(data_array) + elif data_array.dtype == numpy.uint16: + gsd_type = libgsd.GSD_TYPE_UINT16 + data_ptr = __get_ptr_uint16(data_array) + elif data_array.dtype == numpy.uint32: + gsd_type = libgsd.GSD_TYPE_UINT32 + data_ptr = __get_ptr_uint32(data_array) + elif data_array.dtype == numpy.uint64: + gsd_type = libgsd.GSD_TYPE_UINT64 + data_ptr = __get_ptr_uint64(data_array) + elif data_array.dtype == numpy.int8: + gsd_type = libgsd.GSD_TYPE_INT8 + data_ptr = __get_ptr_int8(data_array) + elif data_array.dtype == numpy.int16: + gsd_type = libgsd.GSD_TYPE_INT16 + data_ptr = __get_ptr_int16(data_array) + elif data_array.dtype == numpy.int32: + gsd_type = libgsd.GSD_TYPE_INT32 + data_ptr = __get_ptr_int32(data_array) + elif data_array.dtype == numpy.int64: + gsd_type = libgsd.GSD_TYPE_INT64 + data_ptr = __get_ptr_int64(data_array) + elif data_array.dtype == numpy.float32: + gsd_type = libgsd.GSD_TYPE_FLOAT + data_ptr = __get_ptr_float32(data_array) + elif data_array.dtype == numpy.float64: + gsd_type = libgsd.GSD_TYPE_DOUBLE + data_ptr = __get_ptr_float64(data_array) + # elif data_array.dtype.type is numpy.str_: + # if N != 0: + # M = int(data_array.dtype.itemsize/4) + # print(f"Wrote: {N=}, {M=}, {data_array=}") + # gsd_type = libgsd.GSD_TYPE_CHARACTER + # data_ptr = __get_ptr_char(data_array) + else: + raise ValueError("invalid type for chunk: " + name) + + # Once we have the data pointer, the behavior should be identical + # for all data types logger.debug('write chunk: ' + self.name + ' - ' + name) cdef char * c_name @@ -787,6 +828,9 @@ cdef class GSDFile: elif gsd_type == libgsd.GSD_TYPE_DOUBLE: data_array = numpy.empty(dtype=numpy.float64, shape=[index_entry.N, index_entry.M]) + elif gsd_type == libgsd.GSD_TYPE_CHARACTER: + data_array = numpy.empty(dtype=numpy.int8, + shape=[index_entry.M, index_entry.N]) else: raise ValueError("invalid type for chunk: " + name) @@ -815,6 +859,8 @@ cdef class GSDFile: data_ptr = __get_ptr_float32(data_array) elif gsd_type == libgsd.GSD_TYPE_DOUBLE: data_ptr = __get_ptr_float64(data_array) + elif gsd_type == libgsd.GSD_TYPE_CHARACTER: + data_ptr = __get_ptr_int8(data_array) else: raise ValueError("invalid type for chunk: " + name) @@ -826,6 +872,12 @@ cdef class GSDFile: __raise_on_error(retval, self.name) if index_entry.M == 1: + if gsd_type == libgsd.GSD_TYPE_CHARACTER: + data_array = data_array.flatten() + bytes_array = data_array.view(dtype=numpy.dtype((bytes, data_array.shape[0]))) + print(bytes_array[0].decode("UTF-8")) + return bytes_array[0].decode("UTF-8") + return data_array.reshape([index_entry.N]) else: return data_array diff --git a/gsd/libgsd.pxd b/gsd/libgsd.pxd index e8bb79d8..1d42d078 100644 --- a/gsd/libgsd.pxd +++ b/gsd/libgsd.pxd @@ -16,6 +16,7 @@ cdef extern from "gsd.h" nogil: GSD_TYPE_INT64 GSD_TYPE_FLOAT GSD_TYPE_DOUBLE + GSD_TYPE_CHARACTER cdef enum gsd_open_flag: GSD_OPEN_READWRITE=1 From 06698117f1d4ba191c73bb855e9f2cd5c2ce7362 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 10:33:58 -0400 Subject: [PATCH 04/42] Add new type to pygsd --- gsd/pygsd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index f21c1177..00ef0145 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -63,6 +63,7 @@ 8: numpy.dtype('int64'), 9: numpy.dtype('float32'), 10: numpy.dtype('float64'), + 11: numpy.dtype('str_'), } @@ -352,7 +353,7 @@ def read_chunk(self, frame, name): if len(data_raw) != size: raise OSError - + # TODO: if gsd type is character, decode it here data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type]) if chunk.M == 1: From 6b2fdf719a47120ba722fe44b4b504029c965bc7 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 10:34:23 -0400 Subject: [PATCH 05/42] Create string test for fl --- gsd/test/test_fl.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 57a18484..9b1b4679 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -43,12 +43,16 @@ def test_create(tmp_path, open_mode): numpy.int64, numpy.float32, numpy.float64, + numpy.str_, ], ) def test_dtype(tmp_path, typ): """Test all supported data types.""" - data1d = numpy.array([1, 2, 3, 4, 5, 127], dtype=typ) - data2d = numpy.array([[10, 20], [30, 40], [50, 80]], dtype=typ) + if typ is numpy.str_: + data1d = "test" + else: + data1d = numpy.array([950, 200, 3, 4, 5, 6, 7], dtype=typ) + data2d = numpy.array([[10, 20], [30, 40], [50, 80]], dtype=typ) # <-- don't use this with str data_zero = numpy.array([], dtype=typ) gsd.fl.open( @@ -67,8 +71,9 @@ def test_dtype(tmp_path, typ): schema_version=[1, 2], ) as f: f.write_chunk(name='data1d', data=data1d) - f.write_chunk(name='data2d', data=data2d) - f.write_chunk(name='data_zero', data=data_zero) + if typ is not numpy.str_: + f.write_chunk(name='data2d', data=data2d) + f.write_chunk(name='data_zero', data=data_zero) f.end_frame() with gsd.fl.open( @@ -79,24 +84,28 @@ def test_dtype(tmp_path, typ): schema_version=[1, 2], ) as f: read_data1d = f.read_chunk(frame=0, name='data1d') - read_data2d = f.read_chunk(frame=0, name='data2d') - read_data_zero = f.read_chunk(frame=0, name='data_zero') - - assert data1d.dtype == read_data1d.dtype + if typ is not numpy.str_: + read_data2d = f.read_chunk(frame=0, name='data2d') + read_data_zero = f.read_chunk(frame=0, name='data_zero') + + if typ != numpy.str_: + assert data1d.dtype.type == read_data1d.dtype.type + assert data2d.dtype.type == read_data2d.dtype.type + numpy.testing.assert_array_equal(data2d, read_data2d) + assert data_zero.dtype.type == read_data_zero.dtype.type + assert data_zero.shape == (0,) + numpy.testing.assert_array_equal(data1d, read_data1d) - assert data2d.dtype == read_data2d.dtype - numpy.testing.assert_array_equal(data2d, read_data2d) - assert data_zero.dtype == read_data_zero.dtype - assert data_zero.shape == (0,) + # test again with pygsd with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test_dtype.gsd'), mode='rb')) as f: read_data1d = f.read_chunk(frame=0, name='data1d') read_data2d = f.read_chunk(frame=0, name='data2d') - assert data1d.dtype == read_data1d.dtype + assert data1d.dtype.type == read_data1d.dtype.type numpy.testing.assert_array_equal(data1d, read_data1d) - assert data2d.dtype == read_data2d.dtype + assert data2d.dtype.type == read_data2d.dtype.type numpy.testing.assert_array_equal(data2d, read_data2d) From 4c44b4fe93235999b97143e7ced4fda35ff516f1 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 12:25:06 -0400 Subject: [PATCH 06/42] Split dtype tests into string and nonstring --- gsd/test/test_fl.py | 77 +++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 9b1b4679..69fdd0c1 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -43,16 +43,12 @@ def test_create(tmp_path, open_mode): numpy.int64, numpy.float32, numpy.float64, - numpy.str_, ], ) -def test_dtype(tmp_path, typ): - """Test all supported data types.""" - if typ is numpy.str_: - data1d = "test" - else: - data1d = numpy.array([950, 200, 3, 4, 5, 6, 7], dtype=typ) - data2d = numpy.array([[10, 20], [30, 40], [50, 80]], dtype=typ) # <-- don't use this with str +def test_nonstring_dtypes(tmp_path, typ): + """Test all supported data types except for strings.""" + data1d = numpy.array([1, 2, 3, 4, 5, 127], dtype=typ) + data2d = numpy.array([[10, 20], [30, 40], [50, 80]], dtype=typ) data_zero = numpy.array([], dtype=typ) gsd.fl.open( @@ -71,9 +67,8 @@ def test_dtype(tmp_path, typ): schema_version=[1, 2], ) as f: f.write_chunk(name='data1d', data=data1d) - if typ is not numpy.str_: - f.write_chunk(name='data2d', data=data2d) - f.write_chunk(name='data_zero', data=data_zero) + f.write_chunk(name='data2d', data=data2d) + f.write_chunk(name='data_zero', data=data_zero) f.end_frame() with gsd.fl.open( @@ -84,19 +79,15 @@ def test_dtype(tmp_path, typ): schema_version=[1, 2], ) as f: read_data1d = f.read_chunk(frame=0, name='data1d') - if typ is not numpy.str_: - read_data2d = f.read_chunk(frame=0, name='data2d') - read_data_zero = f.read_chunk(frame=0, name='data_zero') - - if typ != numpy.str_: - assert data1d.dtype.type == read_data1d.dtype.type - assert data2d.dtype.type == read_data2d.dtype.type - numpy.testing.assert_array_equal(data2d, read_data2d) - assert data_zero.dtype.type == read_data_zero.dtype.type - assert data_zero.shape == (0,) - - numpy.testing.assert_array_equal(data1d, read_data1d) + read_data2d = f.read_chunk(frame=0, name='data2d') + read_data_zero = f.read_chunk(frame=0, name='data_zero') + assert data1d.dtype.type == read_data1d.dtype.type + numpy.testing.assert_array_equal(data1d, read_data1d) + assert data2d.dtype.type == read_data2d.dtype.type + numpy.testing.assert_array_equal(data2d, read_data2d) + assert data_zero.dtype.type == read_data_zero.dtype.type + assert data_zero.shape == (0,) # test again with pygsd with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test_dtype.gsd'), mode='rb')) as f: @@ -108,6 +99,46 @@ def test_dtype(tmp_path, typ): assert data2d.dtype.type == read_data2d.dtype.type numpy.testing.assert_array_equal(data2d, read_data2d) +def test_string_dtype(tmp_path): + """Test string datatype. + + Note that the string datatype does not support 0-D or 2-D data.""" + data1d = "test" + + gsd.fl.open( + mode='x', + name=tmp_path / 'test_dtype.gsd', + application='test_dtype', + schema='none', + schema_version=[1, 2], + ) + + with gsd.fl.open( + name=tmp_path / 'test_dtype.gsd', + mode='w', + application='test_dtype', + schema='none', + schema_version=[1, 2], + ) as f: + f.write_chunk(name='data1d', data=data1d) + f.end_frame() + + with gsd.fl.open( + name=tmp_path / 'test_dtype.gsd', + mode='r', + application='test_dtype', + schema='none', + schema_version=[1, 2], + ) as f: + read_data1d = f.read_chunk(frame=0, name='data1d') + + numpy.testing.assert_string_equal(data1d, read_data1d) + + # test again with pygsd + with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test_dtype.gsd'), mode='rb')) as f: + read_data1d = f.read_chunk(frame=0, name='data1d') + + numpy.testing.assert_string_equal(data1d, read_data1d) def test_metadata(tmp_path, open_mode): """Test file metadata.""" From 45c0e961d818987ba941554f05b6dfa923d33bd7 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 12:25:25 -0400 Subject: [PATCH 07/42] Add string decoding to pygsd --- gsd/pygsd.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index 9ba5e7ea..ea1cf771 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -63,7 +63,7 @@ 8: numpy.dtype('int64'), 9: numpy.dtype('float32'), 10: numpy.dtype('float64'), - 11: numpy.dtype('str_'), + 11: numpy.dtype('int8'), # used for strings } @@ -353,8 +353,12 @@ def read_chunk(self, frame, name): if len(data_raw) != size: raise OSError - # TODO: if gsd type is character, decode it here - data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type]) + + # If gsd type is character, decode it here + if chunk.type == 11: + data_npy = data_raw.decode('utf-8') + else: + data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type]) if chunk.M == 1: return data_npy From 21a63eff32261c4aa7d712c1e90a957136c245bf Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Mon, 16 Sep 2024 12:26:37 -0400 Subject: [PATCH 08/42] Add note about strings to the file layer docs --- doc/file-layer.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/file-layer.rst b/doc/file-layer.rst index 78431216..3f416a2b 100644 --- a/doc/file-layer.rst +++ b/doc/file-layer.rst @@ -235,3 +235,6 @@ Data block A data block stores raw data bytes on the disk. For a given index entry ``entry``, the data starts at location ``entry.location`` and is the next ``entry.N * entry.M * gsd_sizeof_type(entry.type)`` bytes. + +The type fields are self-explanatory except for UTF-8 strings, for which +null-termination is allowed but not required. \ No newline at end of file From 58c16a4c6a485542b642c85e7f541c60de21b080 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:25:36 +0000 Subject: [PATCH 09/42] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/file-layer.rst | 2 +- gsd/fl.pyx | 6 +++--- gsd/pygsd.py | 4 ++-- gsd/test/test_fl.py | 6 ++++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/doc/file-layer.rst b/doc/file-layer.rst index 3f416a2b..96c1b50d 100644 --- a/doc/file-layer.rst +++ b/doc/file-layer.rst @@ -237,4 +237,4 @@ A data block stores raw data bytes on the disk. For a given index entry ``entry.N * entry.M * gsd_sizeof_type(entry.type)`` bytes. The type fields are self-explanatory except for UTF-8 strings, for which -null-termination is allowed but not required. \ No newline at end of file +null-termination is allowed but not required. diff --git a/gsd/fl.pyx b/gsd/fl.pyx index 760e1f6f..cb9bbc16 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -148,8 +148,8 @@ cdef void * __get_ptr_float64(data): cdef void * __get_ptr_char(data): # cdef numpy.ndarray[numpy.str_, ndim=2, mode="c", cast=True] data_array_char - cdef size_t address - address = data.__array_interface__["data"][0] + cdef size_t address + address = data.__array_interface__["data"][0] # data_array_char: cython.char[::1] = data.reshape(data.size) # data_array_char = data # cdef char *data_array_char @@ -877,7 +877,7 @@ cdef class GSDFile: bytes_array = data_array.view(dtype=numpy.dtype((bytes, data_array.shape[0]))) print(bytes_array[0].decode("UTF-8")) return bytes_array[0].decode("UTF-8") - + return data_array.reshape([index_entry.N]) else: return data_array diff --git a/gsd/pygsd.py b/gsd/pygsd.py index ea1cf771..00a750ae 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -63,7 +63,7 @@ 8: numpy.dtype('int64'), 9: numpy.dtype('float32'), 10: numpy.dtype('float64'), - 11: numpy.dtype('int8'), # used for strings + 11: numpy.dtype('int8'), # used for strings } @@ -353,7 +353,7 @@ def read_chunk(self, frame, name): if len(data_raw) != size: raise OSError - + # If gsd type is character, decode it here if chunk.type == 11: data_npy = data_raw.decode('utf-8') diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 69fdd0c1..844db625 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -99,11 +99,12 @@ def test_nonstring_dtypes(tmp_path, typ): assert data2d.dtype.type == read_data2d.dtype.type numpy.testing.assert_array_equal(data2d, read_data2d) + def test_string_dtype(tmp_path): """Test string datatype. - + Note that the string datatype does not support 0-D or 2-D data.""" - data1d = "test" + data1d = 'test' gsd.fl.open( mode='x', @@ -140,6 +141,7 @@ def test_string_dtype(tmp_path): numpy.testing.assert_string_equal(data1d, read_data1d) + def test_metadata(tmp_path, open_mode): """Test file metadata.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) From e12a9a240b1552a8bf28cecbd3c66dc7ce75973b Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Wed, 18 Sep 2024 09:34:52 -0400 Subject: [PATCH 10/42] Revise docstring for pre-commit --- gsd/test/test_fl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 844db625..5ec32a30 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -102,9 +102,10 @@ def test_nonstring_dtypes(tmp_path, typ): def test_string_dtype(tmp_path): """Test string datatype. - - Note that the string datatype does not support 0-D or 2-D data.""" - data1d = 'test' + + Note that the string datatype does not support 0-D or 2-D data. + """ + data1d = "test" gsd.fl.open( mode='x', From d1ad023f64f9c557cae649a9fdc4a5807590cc68 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Wed, 18 Sep 2024 10:23:36 -0400 Subject: [PATCH 11/42] Replace type mapping dict with enum using class method --- gsd/pygsd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index 00a750ae..235539ea 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -29,6 +29,7 @@ """ +from enum import Enum import logging import struct import sys From a7aa0c2b356e066bcaf953266feef820f50f1b81 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Wed, 18 Sep 2024 10:24:13 -0400 Subject: [PATCH 12/42] Switch enum to use functional approach --- gsd/pygsd.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index 235539ea..f2546aba 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -53,19 +53,22 @@ gsd_index_entry = namedtuple('gsd_index_entry', 'frame N location M id type flags') gsd_index_entry_struct = struct.Struct('QQqIHBB') -gsd_type_mapping = { - 1: numpy.dtype('uint8'), - 2: numpy.dtype('uint16'), - 3: numpy.dtype('uint32'), - 4: numpy.dtype('uint64'), - 5: numpy.dtype('int8'), - 6: numpy.dtype('int16'), - 7: numpy.dtype('int32'), - 8: numpy.dtype('int64'), - 9: numpy.dtype('float32'), - 10: numpy.dtype('float64'), - 11: numpy.dtype('int8'), # used for strings -} +gsd_type_mapping = Enum( + "gsd_type", + [ + ["uint8", numpy.dtype('uint8')], + ["uint16", numpy.dtype('uint16')], + ["uint32", numpy.dtype('uint32')], + ["uint64", numpy.dtype('uint64')], + ["int8", numpy.dtype('int8')], + ["int16", numpy.dtype('int16')], + ["int32", numpy.dtype('int32')], + ["int64", numpy.dtype('int64')], + ["float32", numpy.dtype('float32')], + ["float64", numpy.dtype('float64')], + ["str", numpy.dtype('int8')], # used for strings + ] +) class GSDFile: From 4c4099cb679f4a56f44e67eefe16b157af533cf8 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 08:54:01 -0400 Subject: [PATCH 13/42] Replace Enum with map of tuples --- gsd/pygsd.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index f2546aba..c8f54d8e 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -53,22 +53,19 @@ gsd_index_entry = namedtuple('gsd_index_entry', 'frame N location M id type flags') gsd_index_entry_struct = struct.Struct('QQqIHBB') -gsd_type_mapping = Enum( - "gsd_type", - [ - ["uint8", numpy.dtype('uint8')], - ["uint16", numpy.dtype('uint16')], - ["uint32", numpy.dtype('uint32')], - ["uint64", numpy.dtype('uint64')], - ["int8", numpy.dtype('int8')], - ["int16", numpy.dtype('int16')], - ["int32", numpy.dtype('int32')], - ["int64", numpy.dtype('int64')], - ["float32", numpy.dtype('float32')], - ["float64", numpy.dtype('float64')], - ["str", numpy.dtype('int8')], # used for strings - ] -) +gsd_type_mapping = { + 1: ("uint8", numpy.dtype('uint8')), + 2: ("uint16", numpy.dtype('uint16')), + 3: ("uint32", numpy.dtype('uint32')), + 4: ("uint64", numpy.dtype('uint64')), + 5: ("int8", numpy.dtype('int8')), + 6: ("int16", numpy.dtype('int16')), + 7: ("int32", numpy.dtype('int32')), + 8: ("int64", numpy.dtype('int64')), + 9: ("float32", numpy.dtype('float32')), + 10: ("float64", numpy.dtype('float64')), + 11: ("str", numpy.dtype('int8')), +} class GSDFile: @@ -338,7 +335,7 @@ def read_chunk(self, frame, name): 'read chunk: ' + str(self.__file) + ' - ' + str(frame) + ' - ' + name ) - size = chunk.N * chunk.M * gsd_type_mapping[chunk.type].itemsize + size = chunk.N * chunk.M * gsd_type_mapping[chunk.type][1].itemsize if chunk.location == 0: raise RuntimeError( 'Corrupt chunk: ' @@ -350,7 +347,7 @@ def read_chunk(self, frame, name): ) if size == 0: - return numpy.array([], dtype=gsd_type_mapping[chunk.type]) + return numpy.array([], dtype=gsd_type_mapping[chunk.type][1]) self.__file.seek(chunk.location, 0) data_raw = self.__file.read(size) @@ -359,10 +356,10 @@ def read_chunk(self, frame, name): raise OSError # If gsd type is character, decode it here - if chunk.type == 11: + if gsd_type_mapping[chunk.type][0] == "str": data_npy = data_raw.decode('utf-8') else: - data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type]) + data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type][1]) if chunk.M == 1: return data_npy From 45a6ce86d4376c877b3cf1622b63ad27c7c86755 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:09:40 -0400 Subject: [PATCH 14/42] Fix formatting with pre-commit --- gsd/test/test_fl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 5ec32a30..7ea6d069 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -102,10 +102,10 @@ def test_nonstring_dtypes(tmp_path, typ): def test_string_dtype(tmp_path): """Test string datatype. - + Note that the string datatype does not support 0-D or 2-D data. """ - data1d = "test" + data1d = 'test' gsd.fl.open( mode='x', From 0b6ff900dc7d0ce325a79e5c4513147a7dd68830 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:09:56 -0400 Subject: [PATCH 15/42] Fix formatting with pre-commit --- gsd/pygsd.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index c8f54d8e..e6bcaae1 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -54,17 +54,17 @@ gsd_index_entry_struct = struct.Struct('QQqIHBB') gsd_type_mapping = { - 1: ("uint8", numpy.dtype('uint8')), - 2: ("uint16", numpy.dtype('uint16')), - 3: ("uint32", numpy.dtype('uint32')), - 4: ("uint64", numpy.dtype('uint64')), - 5: ("int8", numpy.dtype('int8')), - 6: ("int16", numpy.dtype('int16')), - 7: ("int32", numpy.dtype('int32')), - 8: ("int64", numpy.dtype('int64')), - 9: ("float32", numpy.dtype('float32')), - 10: ("float64", numpy.dtype('float64')), - 11: ("str", numpy.dtype('int8')), + 1: ('uint8', numpy.dtype('uint8')), + 2: ('uint16', numpy.dtype('uint16')), + 3: ('uint32', numpy.dtype('uint32')), + 4: ('uint64', numpy.dtype('uint64')), + 5: ('int8', numpy.dtype('int8')), + 6: ('int16', numpy.dtype('int16')), + 7: ('int32', numpy.dtype('int32')), + 8: ('int64', numpy.dtype('int64')), + 9: ('float32', numpy.dtype('float32')), + 10: ('float64', numpy.dtype('float64')), + 11: ('str', numpy.dtype('int8')), } From 385d6a1549868c0521003d1a325cb04ad6ef444f Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:10:22 -0400 Subject: [PATCH 16/42] Remove unused import --- gsd/pygsd.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gsd/pygsd.py b/gsd/pygsd.py index e6bcaae1..9e1adfec 100644 --- a/gsd/pygsd.py +++ b/gsd/pygsd.py @@ -29,7 +29,6 @@ """ -from enum import Enum import logging import struct import sys @@ -356,7 +355,7 @@ def read_chunk(self, frame, name): raise OSError # If gsd type is character, decode it here - if gsd_type_mapping[chunk.type][0] == "str": + if gsd_type_mapping[chunk.type][0] == 'str': data_npy = data_raw.decode('utf-8') else: data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type][1]) From 6f564687d0e308746a990efa2a237d349df68fc9 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:54:24 -0400 Subject: [PATCH 17/42] Silently upgrade minor file version on write --- gsd/gsd.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index 5a4bbac0..ef2db77e 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -86,7 +86,12 @@ enum /// Current GSD file specification enum { - GSD_CURRENT_FILE_VERSION = 2 + GSD_CURRENT_FILE_VERSION_MAJOR = 2 + }; + +enum + { + GSD_CURRENT_FILE_VERSION_MINOR = 1 }; // define windows wrapper functions @@ -1384,7 +1389,8 @@ gsd_initialize_file(int fd, const char* application, const char* schema, uint32_ gsd_util_zero_memory(&header, sizeof(header)); header.magic = GSD_MAGIC_ID; - header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION, 0); + header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, + GSD_CURRENT_FILE_VERSION_MINOR); strncpy(header.application, application, sizeof(header.application) - 1); header.application[sizeof(header.application) - 1] = 0; strncpy(header.schema, schema, sizeof(header.schema) - 1); @@ -1607,6 +1613,21 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) handle->maximum_write_buffer_size = GSD_DEFAULT_MAXIMUM_WRITE_BUFFER_SIZE; handle->index_entries_to_buffer = GSD_DEFAULT_INDEX_ENTRIES_TO_BUFFER; + // When opening a file in a writeable mode, if the file's major version + // is identical to the current major version, silently upgrade the minor version + if((handle->open_flags == GSD_OPEN_READWRITE || handle-> open_flags == GSD_OPEN_APPEND) && + (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) && + (handle->header.gsd_version >> 16 == GSD_CURRENT_FILE_VERSION_MAJOR)) { + + handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); + size_t bytes_written = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); + + if (bytes_written != sizeof(struct gsd_header)) + { + return GSD_ERROR_IO; + } + } + return GSD_SUCCESS; } @@ -2558,8 +2579,9 @@ int gsd_upgrade(struct gsd_handle* handle) } } - // label the file as a v2.0 file - handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION, 0); + // label the file as a v2.1 file + handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, + GSD_CURRENT_FILE_VERSION_MINOR); // write the new header out ssize_t bytes_written From 75595dec13bceff09790b03d516981ef33cba1da Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:56:07 -0400 Subject: [PATCH 18/42] Fix formatting with pre-commit --- gsd/gsd.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index ef2db77e..bd9de68b 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1389,8 +1389,8 @@ gsd_initialize_file(int fd, const char* application, const char* schema, uint32_ gsd_util_zero_memory(&header, sizeof(header)); header.magic = GSD_MAGIC_ID; - header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, - GSD_CURRENT_FILE_VERSION_MINOR); + header.gsd_version + = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); strncpy(header.application, application, sizeof(header.application) - 1); header.application[sizeof(header.application) - 1] = 0; strncpy(header.schema, schema, sizeof(header.schema) - 1); @@ -1615,18 +1615,21 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) // When opening a file in a writeable mode, if the file's major version // is identical to the current major version, silently upgrade the minor version - if((handle->open_flags == GSD_OPEN_READWRITE || handle-> open_flags == GSD_OPEN_APPEND) && - (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) && - (handle->header.gsd_version >> 16 == GSD_CURRENT_FILE_VERSION_MAJOR)) { - - handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); - size_t bytes_written = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); - + if ((handle->open_flags == GSD_OPEN_READWRITE || handle->open_flags == GSD_OPEN_APPEND) + && (handle->header.gsd_version + != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) + && (handle->header.gsd_version >> 16 == GSD_CURRENT_FILE_VERSION_MAJOR)) + { + handle->header.gsd_version + = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); + size_t bytes_written + = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); + if (bytes_written != sizeof(struct gsd_header)) { return GSD_ERROR_IO; } - } + } return GSD_SUCCESS; } @@ -2580,8 +2583,8 @@ int gsd_upgrade(struct gsd_handle* handle) } // label the file as a v2.1 file - handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, - GSD_CURRENT_FILE_VERSION_MINOR); + handle->header.gsd_version + = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); // write the new header out ssize_t bytes_written From 26cdc20ece9339ae66a31ed2d7041507ea0b2f2e Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 09:56:27 -0400 Subject: [PATCH 19/42] Abstract hard-coded current version into separate variable --- gsd/test/test_fl.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/gsd/test/test_fl.py b/gsd/test/test_fl.py index 7ea6d069..fe92c853 100644 --- a/gsd/test/test_fl.py +++ b/gsd/test/test_fl.py @@ -17,6 +17,7 @@ import gsd.pygsd test_path = pathlib.Path(os.path.realpath(__file__)).parent +current_gsd_version = (2, 1) def test_create(tmp_path, open_mode): @@ -172,7 +173,7 @@ def test_metadata(tmp_path, open_mode): assert f.schema == 'none' assert f.schema_version == (1, 2) assert f.nframes == 150 - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version # test again with pygsd with gsd.pygsd.GSDFile( @@ -184,7 +185,7 @@ def test_metadata(tmp_path, open_mode): assert f.schema == 'none' assert f.schema_version == (1, 2) assert f.nframes == 150 - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version def test_append(tmp_path, open_mode): @@ -815,14 +816,14 @@ def check_v1_file_read(f): schema='none', schema_version=[1, 2], ) as f: - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version check_v1_file_read(f) with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version check_v1_file_read(f) @@ -955,7 +956,7 @@ def check_v1_file_read(f): f.upgrade() - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version for value in values: if isinstance(value, int): @@ -975,7 +976,7 @@ def check_v1_file_read(f): schema='none', schema_version=[1, 2], ) as f: - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version check_v1_file_read(f) @@ -983,7 +984,7 @@ def check_v1_file_read(f): with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: - assert f.gsd_version == (2, 0) + assert f.gsd_version == current_gsd_version check_v1_file_read(f) From fa57eaedbccb4a1c039d7c3bead726fef454195a Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 10:00:40 -0400 Subject: [PATCH 20/42] Update changelog --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4d0e30b4..70523a6e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,14 @@ Change Log 3.x --- +3.4.0 (not yet released) +^^^^^^^^^^^^^^^^^^^^^^^^ + +*Added:* + +* New chunk type for string data - valid in file layer versions 2.1 and later + (`#391 `__). + 3.3.2 (2024-09-06) ^^^^^^^^^^^^^^^^^^ From 138ce40571918cab2cd8e9170d3a06e2d6215bd6 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 10:00:47 -0400 Subject: [PATCH 21/42] Update credits --- doc/credits.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/credits.rst b/doc/credits.rst index 2d7c5237..298b157f 100644 --- a/doc/credits.rst +++ b/doc/credits.rst @@ -18,3 +18,4 @@ The following people contributed to GSD. * Alexander Stukowski, OVITO GmbH * Charlotte Shiqi Zhao, University of Michigan * Tim Moore, University of Michigan +* Joseph Burkhart, University of Michigan \ No newline at end of file From 28551fc35c9f8b2c60f44e33f6eabf8185ce9259 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 10:01:32 -0400 Subject: [PATCH 22/42] Fix formatting with pre-commit --- doc/credits.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/credits.rst b/doc/credits.rst index 298b157f..18f9f19a 100644 --- a/doc/credits.rst +++ b/doc/credits.rst @@ -18,4 +18,4 @@ The following people contributed to GSD. * Alexander Stukowski, OVITO GmbH * Charlotte Shiqi Zhao, University of Michigan * Tim Moore, University of Michigan -* Joseph Burkhart, University of Michigan \ No newline at end of file +* Joseph Burkhart, University of Michigan From 34229a1400aed19a01e7d83ecde5712f55c23770 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Fri, 20 Sep 2024 10:03:30 -0400 Subject: [PATCH 23/42] Fix clang tidy error --- gsd/gsd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index bd9de68b..bd13bc2e 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1618,7 +1618,7 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) if ((handle->open_flags == GSD_OPEN_READWRITE || handle->open_flags == GSD_OPEN_APPEND) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) - && (handle->header.gsd_version >> 16 == GSD_CURRENT_FILE_VERSION_MAJOR)) + && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR)) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From 0a64636c4b9722c222b6a3f52d1157e281761fd0 Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:44:59 -0400 Subject: [PATCH 24/42] Remove commented code Co-authored-by: Joshua A. Anderson --- gsd/fl.pyx | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/gsd/fl.pyx b/gsd/fl.pyx index cb9bbc16..96c19435 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -146,21 +146,6 @@ cdef void * __get_ptr_float64(data): else: return &data_array_float64[0, 0] -cdef void * __get_ptr_char(data): - # cdef numpy.ndarray[numpy.str_, ndim=2, mode="c", cast=True] data_array_char - cdef size_t address - address = data.__array_interface__["data"][0] - # data_array_char: cython.char[::1] = data.reshape(data.size) - # data_array_char = data - # cdef char *data_array_char - # data_array_char = cython.address(data.data[0]) - if (data.size == 0): - return NULL - else: - # return cython.address(data_array_char.data[0]) - # return cython.address(data_array_char[0]) - return address - def open(name, mode, application=None, schema=None, schema_version=None): """open(name, mode, application=None, schema=None, schema_version=None) From 10e96ea734bade2397787c3512dd6e1ef84dd838 Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:45:13 -0400 Subject: [PATCH 25/42] Remove commented code Co-authored-by: Joshua A. Anderson --- gsd/fl.pyx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gsd/fl.pyx b/gsd/fl.pyx index 96c19435..853cb0d0 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -625,12 +625,6 @@ cdef class GSDFile: elif data_array.dtype == numpy.float64: gsd_type = libgsd.GSD_TYPE_DOUBLE data_ptr = __get_ptr_float64(data_array) - # elif data_array.dtype.type is numpy.str_: - # if N != 0: - # M = int(data_array.dtype.itemsize/4) - # print(f"Wrote: {N=}, {M=}, {data_array=}") - # gsd_type = libgsd.GSD_TYPE_CHARACTER - # data_ptr = __get_ptr_char(data_array) else: raise ValueError("invalid type for chunk: " + name) From 899e0e21f772122b433234bcc54898662b112204 Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:45:35 -0400 Subject: [PATCH 26/42] Remove unused import Co-authored-by: Joshua A. Anderson --- gsd/fl.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/gsd/fl.pyx b/gsd/fl.pyx index 853cb0d0..e8f42ec5 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -22,7 +22,6 @@ from libc.stdint cimport uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t,\ from libc.errno cimport errno cimport gsd.libgsd as libgsd cimport numpy -cimport cython logger = logging.getLogger('gsd.fl') From 983f639aade0169e1834ed7f02771fdb0d054647 Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:46:07 -0400 Subject: [PATCH 27/42] Remove print statement Co-authored-by: Joshua A. Anderson --- gsd/fl.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/gsd/fl.pyx b/gsd/fl.pyx index e8f42ec5..bc65046d 100644 --- a/gsd/fl.pyx +++ b/gsd/fl.pyx @@ -853,7 +853,6 @@ cdef class GSDFile: if gsd_type == libgsd.GSD_TYPE_CHARACTER: data_array = data_array.flatten() bytes_array = data_array.view(dtype=numpy.dtype((bytes, data_array.shape[0]))) - print(bytes_array[0].decode("UTF-8")) return bytes_array[0].decode("UTF-8") return data_array.reshape([index_entry.N]) From 900f43faab210b1275a8cbc244b4a3e79a1bbf93 Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:47:12 -0400 Subject: [PATCH 28/42] Fix header overwriting when file version reflects current version Co-authored-by: Joshua A. Anderson --- gsd/gsd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index bd13bc2e..5d2c77b7 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1618,7 +1618,8 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) if ((handle->open_flags == GSD_OPEN_READWRITE || handle->open_flags == GSD_OPEN_APPEND) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) - && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR)) + && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR) + && (handle->header.gsd_version & 0xFFFF < GSD_CURRENT_FILE_VERSION_MINOR)) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From 74731b00a4234b0dd22ae00f1a56cb037a27d96c Mon Sep 17 00:00:00 2001 From: Joseph Burkhart <61951318+josephburkhart@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:47:41 -0400 Subject: [PATCH 29/42] Revise comment for clarity Co-authored-by: Joshua A. Anderson --- gsd/gsd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index 5d2c77b7..c26e4138 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -2583,7 +2583,7 @@ int gsd_upgrade(struct gsd_handle* handle) } } - // label the file as a v2.1 file + // GSD always writes files matching the current major and minor version. handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From f84a50767de840a0710fce1e419d9fdffbf7744b Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Tue, 24 Sep 2024 12:58:07 -0400 Subject: [PATCH 30/42] Fix clang tidy error --- gsd/gsd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index c26e4138..d0e412e5 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1619,7 +1619,7 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR) - && (handle->header.gsd_version & 0xFFFF < GSD_CURRENT_FILE_VERSION_MINOR)) + && (handle->header.gsd_version & (0xFFFF < GSD_CURRENT_FILE_VERSION_MINOR))) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From 377c2debcef43a463f2e96f548b708598786f081 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Tue, 24 Sep 2024 13:09:29 -0400 Subject: [PATCH 31/42] Fix clang tidy magic number error --- gsd/gsd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index d0e412e5..2f6731b7 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1615,11 +1615,12 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) // When opening a file in a writeable mode, if the file's major version // is identical to the current major version, silently upgrade the minor version + const unsigned int GSD_MAX_FILE_VERSION_MINOR = 0xFFFF; if ((handle->open_flags == GSD_OPEN_READWRITE || handle->open_flags == GSD_OPEN_APPEND) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR) - && (handle->header.gsd_version & (0xFFFF < GSD_CURRENT_FILE_VERSION_MINOR))) + && (handle->header.gsd_version & (GSD_MAX_FILE_VERSION_MINOR < GSD_CURRENT_FILE_VERSION_MINOR))) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From a01f29b5beb1eadde9e4a41ed55659ce0bbc41a2 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Tue, 24 Sep 2024 13:10:46 -0400 Subject: [PATCH 32/42] Fix formatting error with pre-commit --- gsd/gsd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index 2f6731b7..af220719 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1620,7 +1620,8 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR) - && (handle->header.gsd_version & (GSD_MAX_FILE_VERSION_MINOR < GSD_CURRENT_FILE_VERSION_MINOR))) + && (handle->header.gsd_version + & (GSD_MAX_FILE_VERSION_MINOR < GSD_CURRENT_FILE_VERSION_MINOR))) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From ffe16521a86540153c97f4d0a14746c9f7a2bef0 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 25 Sep 2024 10:12:40 -0400 Subject: [PATCH 33/42] Remove unneeded check. --- gsd/gsd.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gsd/gsd.c b/gsd/gsd.c index af220719..07997408 100644 --- a/gsd/gsd.c +++ b/gsd/gsd.c @@ -1613,15 +1613,12 @@ inline static int gsd_initialize_handle(struct gsd_handle* handle) handle->maximum_write_buffer_size = GSD_DEFAULT_MAXIMUM_WRITE_BUFFER_SIZE; handle->index_entries_to_buffer = GSD_DEFAULT_INDEX_ENTRIES_TO_BUFFER; - // When opening a file in a writeable mode, if the file's major version - // is identical to the current major version, silently upgrade the minor version - const unsigned int GSD_MAX_FILE_VERSION_MINOR = 0xFFFF; + // Silently upgrade writable files from a previous matching major version to the latest + // minor version. if ((handle->open_flags == GSD_OPEN_READWRITE || handle->open_flags == GSD_OPEN_APPEND) && (handle->header.gsd_version != gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR)) - && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR) - && (handle->header.gsd_version - & (GSD_MAX_FILE_VERSION_MINOR < GSD_CURRENT_FILE_VERSION_MINOR))) + && (handle->header.gsd_version >> (sizeof(uint32_t) * 4) == GSD_CURRENT_FILE_VERSION_MAJOR)) { handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION_MAJOR, GSD_CURRENT_FILE_VERSION_MINOR); From 6f37b8ffefaa8cead6b7ee5433a4ef8348f0fb79 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 25 Sep 2024 10:26:32 -0400 Subject: [PATCH 34/42] Reword GSD_CHARACTER definition in docs. --- doc/file-layer.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/file-layer.rst b/doc/file-layer.rst index 96c1b50d..8bec0f8a 100644 --- a/doc/file-layer.rst +++ b/doc/file-layer.rst @@ -6,7 +6,7 @@ File layer .. highlight:: c -**Version: 2.0** +**Version: 2.x** General simulation data (GSD) **file layer** design and rationale. These use cases and design specifications define the low level GSD file format. @@ -236,5 +236,8 @@ A data block stores raw data bytes on the disk. For a given index entry ``entry``, the data starts at location ``entry.location`` and is the next ``entry.N * entry.M * gsd_sizeof_type(entry.type)`` bytes. -The type fields are self-explanatory except for UTF-8 strings, for which -null-termination is allowed but not required. +Added in version 2.1 +^^^^^^^^^^^^^^^^^^^^ + +* The ``GSD_CHARACTER`` chunk type represents a UTF-8 string (null termination is allowed, but not + required). From 1ec21b9c9c5fb70c5b7d2bb452f5aa15e80c4cc3 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 25 Sep 2024 10:55:23 -0400 Subject: [PATCH 35/42] Further revisions to the file layer spec. --- doc/file-layer.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/file-layer.rst b/doc/file-layer.rst index 8bec0f8a..a79fb1dc 100644 --- a/doc/file-layer.rst +++ b/doc/file-layer.rst @@ -128,7 +128,7 @@ There are four types of data blocks in a GSD file. * List of string names used by index entries. * v1.0 files: Each name is a 64-byte character string. - * v2.0 files: Names may have any length and are separated by 0 terminators. + * v2.x files: Names may have any length and are separated by 0 terminators. * The first name that starts with the 0 byte marks the end of the list * The header stores the total size of the name list block. @@ -215,13 +215,13 @@ non-standard packing attributes or pragmas to enforce this. In v1.0 files, the frame index must monotonically increase from one index entry to the next. The GSD API ensures this. -In v2.0 files, the entire index block is stored sorted first by frame, then +In v2.x files, the entire index block is stored sorted first by frame, then by *id*. Namelist block ^^^^^^^^^^^^^^ -In v2.0 files, the namelist block stores a list of strings separated by 0 +In v2.x files, the namelist block stores a list of strings separated by 0 terminators. In v1.0 files, the namelist block stores a list of 0-terminated strings in @@ -237,7 +237,7 @@ A data block stores raw data bytes on the disk. For a given index entry ``entry.N * entry.M * gsd_sizeof_type(entry.type)`` bytes. Added in version 2.1 -^^^^^^^^^^^^^^^^^^^^ +-------------------- * The ``GSD_CHARACTER`` chunk type represents a UTF-8 string (null termination is allowed, but not required). From 3e2ae056d1ba939bbca42c1c996c1b703d02f869 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 25 Sep 2024 11:41:29 -0400 Subject: [PATCH 36/42] Update string writing example. --- doc/fl-examples.rst | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/doc/fl-examples.rst b/doc/fl-examples.rst index d66a47e3..e61b84c3 100644 --- a/doc/fl-examples.rst +++ b/doc/fl-examples.rst @@ -198,21 +198,15 @@ Store string chunks application="My application", schema="My Schema", schema_version=[1,0]) - f.mode - s = "This is a string" - b = numpy.array([s], dtype=numpy.dtype((bytes, len(s)+1))) - b = b.view(dtype=numpy.int8) - b - f.write_chunk(name='string', data=b) + f.write_chunk(name='string', data="This is a string") f.end_frame() r = f.read_chunk(frame=0, name='string') r - r = r.view(dtype=numpy.dtype((bytes, r.shape[0]))); - r[0].decode('UTF-8') f.close() -To store a string in a gsd file, convert it to a numpy array of bytes and store -that data in the file. Decode the byte sequence to get back a string. +Staring with GSD 3.4.0, the file layer can natively store strings in the file. +In previous versions, you need to convert strings to a numpy array of bytes and store +that data in the file. Truncate ^^^^^^^^ From e1bb00317178df2f29263de7ae8bd41ca2670834 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Thu, 26 Sep 2024 10:10:34 -0400 Subject: [PATCH 37/42] Tweak log reading to correctly duck-type strings --- gsd/hoomd.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gsd/hoomd.py b/gsd/hoomd.py index 7af28a29..a4093339 100644 --- a/gsd/hoomd.py +++ b/gsd/hoomd.py @@ -1195,6 +1195,9 @@ def read_log(name, scalar_only=False): tmp = numpy.array([0], dtype=numpy.uint64) else: tmp = gsdfileobj.read_chunk(frame=0, name=log) + # if chunk contains string, put it in the numpy array + if isinstance(tmp, str): + tmp = numpy.array([tmp]) if scalar_only and not tmp.shape[0] == 1: continue From 3c5ef91cbb35f584e1ddafdccb1c9e1b937febe1 Mon Sep 17 00:00:00 2001 From: josephburkhart Date: Thu, 26 Sep 2024 10:10:51 -0400 Subject: [PATCH 38/42] Add string data type to log tests --- gsd/test/test_hoomd.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/gsd/test/test_hoomd.py b/gsd/test/test_hoomd.py index 169180bb..eb4ca65c 100644 --- a/gsd/test/test_hoomd.py +++ b/gsd/test/test_hoomd.py @@ -794,11 +794,13 @@ def test_log(tmp_path, open_mode): frame0.log['particles/pair_lj_energy'] = [0, -5, -8, -3] frame0.log['value/potential_energy'] = [10] frame0.log['value/pressure'] = [-3] + frame0.log['category'] = 'A' frame1 = gsd.hoomd.Frame() frame1.log['particles/pair_lj_energy'] = [1, 2, -4, -10] frame1.log['value/pressure'] = [5] + frame1.log['category'] = 'B' with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode=open_mode.write) as hf: hf.extend([frame0, frame1]) @@ -819,6 +821,7 @@ def test_log(tmp_path, open_mode): numpy.testing.assert_array_equal( s.log['value/pressure'], frame0.log['value/pressure'] ) + assert s.log['category'] == frame0.log['category'] s = hf[1] @@ -837,6 +840,7 @@ def test_log(tmp_path, open_mode): numpy.testing.assert_array_equal( s.log['value/pressure'], frame1.log['value/pressure'] ) + assert s.log['category'] == frame1.log['category'] def test_pickle(tmp_path): @@ -877,6 +881,7 @@ def test_read_log(tmp_path): ] frame0.log['value/potential_energy'] = [10] frame0.log['value/pressure'] = [-3] + frame0.log['category'] = 'A' frame1 = gsd.hoomd.Frame() frame1.configuration.step = 1 @@ -888,6 +893,7 @@ def test_read_log(tmp_path): (4, 4, 4), ] frame1.log['value/pressure'] = [5] + frame1.log['category'] = 'B' with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode='w') as hf: hf.extend([frame0, frame1]) @@ -897,13 +903,14 @@ def test_read_log(tmp_path): name=tmp_path / 'test_log.gsd', scalar_only=False ) - assert len(logged_data_dict) == 5 + assert len(logged_data_dict) == 6 assert list(logged_data_dict.keys()) == [ 'configuration/step', 'log/particles/pair_lj_energy', 'log/particles/pair_lj_force', 'log/value/potential_energy', 'log/value/pressure', + 'log/category', ] numpy.testing.assert_array_equal(logged_data_dict['configuration/step'], [0, 1]) @@ -926,16 +933,21 @@ def test_read_log(tmp_path): logged_data_dict['log/value/pressure'], [*frame0.log['value/pressure'], *frame1.log['value/pressure']], ) + numpy.testing.assert_array_equal( + logged_data_dict['log/category'], + [*frame0.log['category'], *frame1.log['category']], + ) # Test scalar_only = True logged_data_dict = gsd.hoomd.read_log( name=tmp_path / 'test_log.gsd', scalar_only=True ) - assert len(logged_data_dict) == 3 + assert len(logged_data_dict) == 4 assert list(logged_data_dict.keys()) == [ 'configuration/step', 'log/value/potential_energy', 'log/value/pressure', + 'log/category', ] numpy.testing.assert_array_equal(logged_data_dict['configuration/step'], [0, 1]) numpy.testing.assert_array_equal( @@ -946,6 +958,10 @@ def test_read_log(tmp_path): logged_data_dict['log/value/pressure'], [*frame0.log['value/pressure'], *frame1.log['value/pressure']], ) + numpy.testing.assert_array_equal( + logged_data_dict['log/category'], + [*frame0.log['category'], *frame1.log['category']], + ) def test_read_log_warning(tmp_path): From eca3c6a40889bbf66151fd65d495f9a5748c3fef Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Fri, 18 Oct 2024 09:07:59 -0400 Subject: [PATCH 39/42] Use StringDType in read_log for string chunks. --- gsd/hoomd.py | 13 ++++++++++--- gsd/test/test_hoomd.py | 14 ++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/gsd/hoomd.py b/gsd/hoomd.py index a4093339..3934a8f3 100644 --- a/gsd/hoomd.py +++ b/gsd/hoomd.py @@ -1197,13 +1197,15 @@ def read_log(name, scalar_only=False): tmp = gsdfileobj.read_chunk(frame=0, name=log) # if chunk contains string, put it in the numpy array if isinstance(tmp, str): - tmp = numpy.array([tmp]) + tmp = numpy.array([tmp], dtype=numpy.dtypes.StringDType) if scalar_only and not tmp.shape[0] == 1: continue if tmp.shape[0] == 1: logged_data_dict[log] = numpy.full( - fill_value=tmp[0], shape=(gsdfileobj.nframes,) + fill_value=tmp[0], + shape=(gsdfileobj.nframes,), + dtype=tmp.dtype, ) else: logged_data_dict[log] = numpy.tile( @@ -1215,7 +1217,12 @@ def read_log(name, scalar_only=False): if not gsdfileobj.chunk_exists(frame=idx, name=key): continue data = gsdfileobj.read_chunk(frame=idx, name=key) - if len(logged_data_dict[key][idx].shape) == 0: + if ( + not isinstance( + logged_data_dict[key].dtype, numpy.dtypes.StringDType + ) + and len(logged_data_dict[key][idx].shape) == 0 + ): logged_data_dict[key][idx] = data[0] else: logged_data_dict[key][idx] = data diff --git a/gsd/test/test_hoomd.py b/gsd/test/test_hoomd.py index eb4ca65c..3b813c17 100644 --- a/gsd/test/test_hoomd.py +++ b/gsd/test/test_hoomd.py @@ -800,7 +800,7 @@ def test_log(tmp_path, open_mode): frame1.log['particles/pair_lj_energy'] = [1, 2, -4, -10] frame1.log['value/pressure'] = [5] - frame1.log['category'] = 'B' + frame1.log['category'] = 'BBB' with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode=open_mode.write) as hf: hf.extend([frame0, frame1]) @@ -893,7 +893,7 @@ def test_read_log(tmp_path): (4, 4, 4), ] frame1.log['value/pressure'] = [5] - frame1.log['category'] = 'B' + frame1.log['category'] = 'BBB' with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode='w') as hf: hf.extend([frame0, frame1]) @@ -935,7 +935,10 @@ def test_read_log(tmp_path): ) numpy.testing.assert_array_equal( logged_data_dict['log/category'], - [*frame0.log['category'], *frame1.log['category']], + numpy.array( + [frame0.log['category'], frame1.log['category']], + dtype=numpy.dtypes.StringDType, + ), ) # Test scalar_only = True @@ -960,7 +963,10 @@ def test_read_log(tmp_path): ) numpy.testing.assert_array_equal( logged_data_dict['log/category'], - [*frame0.log['category'], *frame1.log['category']], + numpy.array( + [frame0.log['category'], frame1.log['category']], + dtype=numpy.dtypes.StringDType, + ), ) From 50c8b0b7dcd0469bf92ae4183d53a79b97612f76 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Fri, 18 Oct 2024 09:25:28 -0400 Subject: [PATCH 40/42] Require numpy 2.0. --- .github/workflows/build_wheels.yaml | 8 ++++---- INSTALLING.rst | 2 +- pyproject.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 4fbe2ddb..b847e5ca 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -38,11 +38,11 @@ jobs: python: - version: 'cp310' - oldest_numpy: '1.21.6' + oldest_numpy: '2.0.0' - version: 'cp311' - oldest_numpy: '1.23.2' + oldest_numpy: '2.0.0' - version: 'cp312' - oldest_numpy: '1.26.2' + oldest_numpy: '2.0.0' - version: 'cp313' oldest_numpy: '2.1.1' @@ -53,7 +53,7 @@ jobs: uses: pypa/cibuildwheel@d4a2945fcc8d13f20a1b99d461b8e844d5fc6e23 # v2.21.1 env: CIBW_BUILD: "${{ matrix.python.version }}-*" - CIBW_TEST_REQUIRES: pytest==8.2.1 numpy==${{ matrix.python.oldest_numpy }} + CIBW_TEST_REQUIRES: pytest==8.3.3 numpy==${{ matrix.python.oldest_numpy }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: diff --git a/INSTALLING.rst b/INSTALLING.rst index 712ac972..af2f9472 100644 --- a/INSTALLING.rst +++ b/INSTALLING.rst @@ -109,7 +109,7 @@ Install prerequisites * **C compiler** (tested with gcc 10-14, clang 10-18, Visual Studio 2019-2022) * **Python** >= 3.10 -* **numpy** >= 1.19.0 +* **numpy** >= 2.0.0 * **Cython** >= 0.22 **To execute unit tests:** diff --git a/pyproject.toml b/pyproject.toml index dfb43604..700919f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers=[ "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Physics", ] -dependencies = ["numpy>=1.19.0"] +dependencies = ["numpy>=2.0.0"] [project.scripts] gsd = "gsd.__main__:main" From 12e0ea63d23d21a465d2cf9ce56f141a30a58e07 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Fri, 18 Oct 2024 09:31:34 -0400 Subject: [PATCH 41/42] Test with Python 3.13 final. --- .github/workflows/unit_test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_test.yaml b/.github/workflows/unit_test.yaml index f0d0d43a..e9b8da77 100644 --- a/.github/workflows/unit_test.yaml +++ b/.github/workflows/unit_test.yaml @@ -30,7 +30,7 @@ jobs: - os: windows-2019 python: '3.12' - os: windows-2022 - python: '3.13.0-rc.1' + python: '3.13' ############## # Mac # macos-x86_64 @@ -38,7 +38,7 @@ jobs: python: '3.12' # macos-arm64 - os: macos-14 - python: '3.13.0-rc.1' + python: '3.13' ############## # Ubuntu 24.04 - os: ubuntu-24.04 @@ -58,7 +58,7 @@ jobs: c_compiler: clang-18 cxx_compiler: clang++-18 - os: ubuntu-24.04 - python: '3.13.0-rc.1' + python: '3.13.0' c_compiler: clang-18 cxx_compiler: clang++-18 ############## From 59e421385ba4d9156a828252720d6a8f31e0dfa6 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Fri, 18 Oct 2024 09:32:59 -0400 Subject: [PATCH 42/42] Mention numpy 2.0 requirement in change log. --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 70523a6e..e0a0636e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,11 @@ Change Log * New chunk type for string data - valid in file layer versions 2.1 and later (`#391 `__). +*Changed:* + +* Require NumPy >= 2.0 + (`#391 `__). + 3.3.2 (2024-09-06) ^^^^^^^^^^^^^^^^^^