From 0df592d194bc80c7a06e3b7a916cd00d43498af9 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 15 Nov 2023 13:30:12 +0100 Subject: [PATCH] feat[next] high-level field storage API (#1319) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce user API to allocate fields in `gt4py.next`. Summary of main changes: - Introduce FieldBuffer allocator protocols and implementations - Introduce the concept of Backend as ProgramExecutor & Allocator - Replace np_as_located_field with as_field - Make NdArrayField public - Fixes for _core.definitions typings - Fixes and extensions of eve.extended_typing - Refactor the handling of backends/program processors in the testing infrastructure with string enumerations representing the qualified name of the Python symbol, which can be loaded on demand - Rename some executor symbols and modules - Minor style changes to imports and imported symbols to follow coding guidelines. Open"To Do"s for future PRs: - Add support for `GTFieldInterface` protocol in cartesian and use it instead of `NextGTDimsInterface` protocol in next. - Add support for `aligned_index != None` in `FieldBufferAllocator` implementations - Add support for zero-copy construction of fields in `constructors.as_field()` --------- Co-authored-by: Enrique G. Paredes <18477+egparedes@users.noreply.github.com> Co-authored-by: Enrique Gonzalez Paredes Co-authored-by: Rico Häuselmann Co-authored-by: nfarabullini --- .../0008-Mapping_Domain_to_Cpp-Backend.md | 5 +- docs/development/ADRs/Index.md | 1 + docs/user/next/QuickstartGuide.md | 50 ++- src/gt4py/_core/definitions.py | 66 ++-- src/gt4py/eve/codegen.py | 2 +- src/gt4py/eve/extended_typing.py | 62 +++- src/gt4py/eve/utils.py | 56 ++- src/gt4py/next/__init__.py | 7 + src/gt4py/next/allocators.py | 349 ++++++++++++++++++ src/gt4py/next/common.py | 63 +++- src/gt4py/next/constructors.py | 297 +++++++++++++++ src/gt4py/next/embedded/nd_array_field.py | 42 +-- src/gt4py/next/ffront/decorator.py | 18 +- src/gt4py/next/iterator/embedded.py | 7 +- .../program_processors/formatters/gtfn.py | 2 +- .../formatters/pretty_print.py | 26 +- .../formatters/type_check.py | 2 +- .../otf_compile_executor.py | 31 +- .../program_processors/processor_interface.py | 163 ++++++-- .../runners/dace_iterator/__init__.py | 49 ++- .../runners/double_roundtrip.py | 24 +- .../next/program_processors/runners/gtfn.py | 64 +++- .../program_processors/runners/roundtrip.py | 45 ++- src/gt4py/storage/allocators.py | 231 ++++++------ src/gt4py/storage/cartesian/interface.py | 17 +- src/gt4py/storage/cartesian/utils.py | 43 ++- .../unit_tests/test_extended_typing.py | 63 ++++ tests/eve_tests/unit_tests/test_utils.py | 15 + tests/next_tests/__init__.py | 4 + tests/next_tests/exclusion_matrices.py | 78 +++- tests/next_tests/integration_tests/cases.py | 23 +- .../ffront_tests/ffront_test_utils.py | 37 +- .../ffront_tests/test_execution.py | 12 +- .../ffront_tests/test_external_local_field.py | 4 +- .../ffront_tests/test_gpu_backend.py | 18 +- .../ffront_tests/test_gt4py_builtins.py | 2 +- .../test_math_builtin_execution.py | 6 +- .../ffront_tests/test_math_unary_builtins.py | 2 +- .../ffront_tests/test_program.py | 2 +- .../ffront_tests/test_scalar_if.py | 3 +- .../iterator_tests/test_builtins.py | 41 +- .../test_cartesian_offset_provider.py | 4 +- .../iterator_tests/test_conditional.py | 4 +- .../iterator_tests/test_constant.py | 4 +- .../test_horizontal_indirection.py | 18 +- .../iterator_tests/test_implicit_fencil.py | 4 +- .../feature_tests/iterator_tests/test_scan.py | 7 +- .../test_strided_offset_provider.py | 7 +- .../iterator_tests/test_trivial.py | 16 +- .../iterator_tests/test_tuple.py | 99 ++--- .../feature_tests/test_util_cases.py | 8 +- .../ffront_tests/test_icon_like_scan.py | 24 +- .../iterator_tests/test_anton_toy.py | 6 +- .../iterator_tests/test_column_stencil.py | 52 +-- .../iterator_tests/test_fvm_nabla.py | 50 +-- .../iterator_tests/test_hdiff.py | 6 +- .../iterator_tests/test_vertical_advection.py | 4 +- .../test_with_toy_connectivity.py | 56 +-- .../otf_tests/test_gtfn_workflow.py | 6 +- tests/next_tests/unit_tests/conftest.py | 64 ++-- .../embedded_tests/test_nd_array_field.py | 4 +- .../iterator_tests/test_runtime_domain.py | 7 +- .../gtfn_tests/test_gtfn_module.py | 2 +- .../test_processor_interface.py | 53 +++ .../next_tests/unit_tests/test_allocators.py | 193 ++++++++++ .../unit_tests/test_constructors.py | 175 +++++++++ 66 files changed, 2277 insertions(+), 628 deletions(-) create mode 100644 src/gt4py/next/allocators.py create mode 100644 src/gt4py/next/constructors.py create mode 100644 tests/next_tests/unit_tests/test_allocators.py create mode 100644 tests/next_tests/unit_tests/test_constructors.py diff --git a/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md b/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md index 23b75c6df5..a1ee8575d2 100644 --- a/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md +++ b/docs/development/ADRs/0008-Mapping_Domain_to_Cpp-Backend.md @@ -7,10 +7,13 @@ tags: [] - **Status**: valid - **Authors**: Hannes Vogt (@havogt) - **Created**: 2022-06-29 -- **Updated**: 2022-06-29 +- **Updated**: 2023-11-08 This document proposes a (temporary) solution for mapping domain dimensions to field dimensions. +> [!NOTE] +> This ADR was written before the integration of `gt4py.storage` into `gt4py.next`, so the example is using `np_as_located_field` (now deprecated) instead of `gtx.as_field.partial`. The idea conveyed by the example remains unchanged. + ## Context The Python embedded execution for Iterator IR keeps track of the current location type of an iterator (allows safety checks) while the C++ backend does not. diff --git a/docs/development/ADRs/Index.md b/docs/development/ADRs/Index.md index 09d2273ee9..24272d9cee 100644 --- a/docs/development/ADRs/Index.md +++ b/docs/development/ADRs/Index.md @@ -45,6 +45,7 @@ _None_ - [0006 - C++ Backend](0006-Cpp-Backend.md) - [0007 - Fencil Processors](0007-Fencil-Processors.md) - [0008 - Mapping Domain to Cpp Backend](0008-Mapping_Domain_to_Cpp-Backend.md) +- [0016 - Multiple Backends and Build Systems](0016-Multiple-Backends-and-Build-Systems.md) ### Python Integration diff --git a/docs/user/next/QuickstartGuide.md b/docs/user/next/QuickstartGuide.md index bf6466ade6..1ae1db4d92 100644 --- a/docs/user/next/QuickstartGuide.md +++ b/docs/user/next/QuickstartGuide.md @@ -51,7 +51,7 @@ from gt4py.next import float64, neighbor_sum, where #### Fields -Fields store data as a multi-dimensional array, and are defined over a set of named dimensions. The code snippet below defines two named dimensions, _cell_ and _K_, and creates the fields `a` and `b` over their cartesian product using the `np_as_located_field` helper function. The fields contain the values 2 for `a` and 3 for `b` for all entries. +Fields store data as a multi-dimensional array, and are defined over a set of named dimensions. The code snippet below defines two named dimensions, _Cell_ and _K_, and creates the fields `a` and `b` over their cartesian product using the `gtx.as_field` helper function. The fields contain the values 2 for `a` and 3 for `b` for all entries. ```{code-cell} ipython3 CellDim = gtx.Dimension("Cell") @@ -63,8 +63,20 @@ grid_shape = (num_cells, num_layers) a_value = 2.0 b_value = 3.0 -a = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=a_value, dtype=np.float64)) -b = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=b_value, dtype=np.float64)) +a = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=a_value, dtype=np.float64)) +b = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=b_value, dtype=np.float64)) +``` + +Additional numpy-equivalent constructors are available, namely `ones`, `zeros`, `empty`, `full`. These require domain, dtype, and allocator (e.g. a backend) specifications. + +```{code-cell} ipython3 +from gt4py._core import definitions as core_defs +array_of_ones_numpy = np.ones((grid_shape[0], grid_shape[1])) +field_of_ones = gtx.constructors.ones( + domain={I: range(grid_shape[0]), J: range(grid_shape[0])}, + dtype=core_defs.dtype(np.float64), + allocator=gtx.program_processors.runners.roundtrip.backend +) ``` _Note: The interface to construct fields is provisional only and will change soon._ @@ -87,7 +99,7 @@ def add(a: gtx.Field[[CellDim, KDim], float64], You can call field operators from [programs](#Programs), other field operators, or directly. The code snippet below shows a direct call, in which case you have to supply two additional arguments: `out`, which is a field to write the return value to, and `offset_provider`, which is left empty for now. The result of the field operator is a field with all entries equal to 5, but for brevity, only the average and the standard deviation of the entries are printed: ```{code-cell} ipython3 -result = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) +result = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) add(a, b, out=result, offset_provider={}) print("{} + {} = {} ± {}".format(a_value, b_value, np.average(np.asarray(result)), np.std(np.asarray(result)))) @@ -113,7 +125,7 @@ def run_add(a : gtx.Field[[CellDim, KDim], float64], You can execute the program by simply calling it: ```{code-cell} ipython3 -result = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) +result = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) run_add(a, b, result, offset_provider={}) print("{} + {} = {} ± {}".format(b_value, (a_value + b_value), np.average(np.asarray(result)), np.std(np.asarray(result)))) @@ -200,8 +212,8 @@ cell_to_edge_table = np.array([ Let's start by defining two fields: one over the cells and another one over the edges. The field over cells serves input for subsequent calculations and is therefore filled up with values, whereas the field over the edges stores the output of the calculations and is therefore left blank. ```{code-cell} ipython3 -cell_values = gtx.np_as_located_field(CellDim)(np.array([1.0, 1.0, 2.0, 3.0, 5.0, 8.0])) -edge_values = gtx.np_as_located_field(EdgeDim)(np.zeros((12,))) +cell_values = gtx.as_field([CellDim], np.array([1.0, 1.0, 2.0, 3.0, 5.0, 8.0])) +edge_values = gtx.as_field([EdgeDim], np.zeros((12,))) ``` | ![cell_values](connectivity_cell_field.svg) | @@ -295,8 +307,8 @@ This function takes 3 input arguments: In the case where the true and false branches are either fields or scalars, the resulting output will be a field including all dimensions from all inputs. For example: ```{code-cell} ipython3 -mask = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape, dtype=bool)) -result_where = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) +mask = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape, dtype=bool)) +result_where = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) b = 6.0 @gtx.field_operator @@ -313,8 +325,8 @@ print("where return: {}".format(np.asarray(result_where))) The `where` supports the return of tuples of fields. To perform promotion of dimensions and dtype of the output, all arguments are analyzed and promoted as in the above section. ```{code-cell} ipython3 -result_1 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) -result_2 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) +result_1 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) +result_2 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) @gtx.field_operator def _conditional_tuple(mask: gtx.Field[[CellDim, KDim], bool], a: gtx.Field[[CellDim, KDim], float64], b: float @@ -338,13 +350,13 @@ The `where` builtin also allows for nesting of tuples. In this scenario, it will and then combine results to match the return type: ```{code-cell} ipython3 -a = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=2.0, dtype=np.float64)) -b = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=3.0, dtype=np.float64)) -c = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=4.0, dtype=np.float64)) -d = gtx.np_as_located_field(CellDim, KDim)(np.full(shape=grid_shape, fill_value=5.0, dtype=np.float64)) +a = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=2.0, dtype=np.float64)) +b = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=3.0, dtype=np.float64)) +c = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=4.0, dtype=np.float64)) +d = gtx.as_field([CellDim, KDim], np.full(shape=grid_shape, fill_value=5.0, dtype=np.float64)) -result_1 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) -result_2 = gtx.np_as_located_field(CellDim, KDim)(np.zeros(shape=grid_shape)) +result_1 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) +result_2 = gtx.as_field([CellDim, KDim], np.zeros(shape=grid_shape)) @gtx.field_operator def _conditional_tuple_nested( @@ -402,7 +414,7 @@ edge_weights = np.array([ [0, -1, -1], # cell 5 ], dtype=np.float64) -edge_weight_field = gtx.np_as_located_field(CellDim, C2EDim)(edge_weights) +edge_weight_field = gtx.as_field([CellDim, C2EDim], edge_weights) ``` Now you have everything to implement the pseudo-laplacian. Its field operator requires the cell field and the edge weights as inputs, and outputs a cell field of the same shape as the input. @@ -428,7 +440,7 @@ def run_pseudo_laplacian(cells : gtx.Field[[CellDim], float64], out : gtx.Field[[CellDim], float64]): pseudo_lap(cells, edge_weights, out=out) -result_pseudo_lap = gtx.np_as_located_field(CellDim)(np.zeros(shape=(6,))) +result_pseudo_lap = gtx.as_field([CellDim], np.zeros(shape=(6,))) run_pseudo_laplacian(cell_values, edge_weight_field, diff --git a/src/gt4py/_core/definitions.py b/src/gt4py/_core/definitions.py index 059ba6c24c..7b318bc2de 100644 --- a/src/gt4py/_core/definitions.py +++ b/src/gt4py/_core/definitions.py @@ -25,6 +25,7 @@ import numpy as np import numpy.typing as npt +import gt4py.eve as eve from gt4py.eve.extended_typing import ( TYPE_CHECKING, Any, @@ -71,33 +72,33 @@ float64 = np.float64 BoolScalar: TypeAlias = Union[bool_, bool] -BoolT = TypeVar("BoolT", bound=Union[bool_, bool]) +BoolT = TypeVar("BoolT", bound=BoolScalar) BOOL_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], BoolScalar.__args__) # type: ignore[attr-defined] IntScalar: TypeAlias = Union[int8, int16, int32, int64, int] -IntT = TypeVar("IntT", bound=Union[int8, int16, int32, int64, int]) +IntT = TypeVar("IntT", bound=IntScalar) INT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], IntScalar.__args__) # type: ignore[attr-defined] UnsignedIntScalar: TypeAlias = Union[uint8, uint16, uint32, uint64] -UnsignedIntT = TypeVar("UnsignedIntT", bound=Union[uint8, uint16, uint32, uint64]) +UnsignedIntT = TypeVar("UnsignedIntT", bound=UnsignedIntScalar) UINT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], UnsignedIntScalar.__args__) # type: ignore[attr-defined] IntegralScalar: TypeAlias = Union[IntScalar, UnsignedIntScalar] -IntegralT = TypeVar("IntegralT", bound=Union[IntScalar, UnsignedIntScalar]) +IntegralT = TypeVar("IntegralT", bound=IntegralScalar) INTEGRAL_TYPES: Final[Tuple[type, ...]] = (*INT_TYPES, *UINT_TYPES) FloatingScalar: TypeAlias = Union[float32, float64, float] -FloatingT = TypeVar("FloatingT", bound=Union[float32, float64, float]) +FloatingT = TypeVar("FloatingT", bound=FloatingScalar) FLOAT_TYPES: Final[Tuple[type, ...]] = cast(Tuple[type, ...], FloatingScalar.__args__) # type: ignore[attr-defined] #: Type alias for all scalar types supported by GT4Py Scalar: TypeAlias = Union[BoolScalar, IntegralScalar, FloatingScalar] -ScalarT = TypeVar("ScalarT", bound=Union[BoolScalar, IntegralScalar, FloatingScalar]) +ScalarT = TypeVar("ScalarT", bound=Scalar) SCALAR_TYPES: Final[tuple[type, ...]] = (*BOOL_TYPES, *INTEGRAL_TYPES, *FLOAT_TYPES) @@ -139,7 +140,7 @@ def is_valid_tensor_shape( # -- Data type descriptors -- -class DTypeKind(enum.Enum): +class DTypeKind(eve.StrEnum): """ Kind of a specific data type. @@ -368,7 +369,7 @@ def __gt_origin__(self) -> Tuple[int, ...]: # -- Device representation -- -class DeviceType(enum.Enum): +class DeviceType(enum.IntEnum): """The type of the device where a memory buffer is allocated. Enum values taken from DLPack reference implementation at: @@ -385,8 +386,31 @@ class DeviceType(enum.Enum): ROCM = 10 +CPUDeviceTyping: TypeAlias = Literal[DeviceType.CPU] +CUDADeviceTyping: TypeAlias = Literal[DeviceType.CUDA] +CPUPinnedDeviceTyping: TypeAlias = Literal[DeviceType.CPU_PINNED] +OpenCLDeviceTyping: TypeAlias = Literal[DeviceType.OPENCL] +VulkanDeviceTyping: TypeAlias = Literal[DeviceType.VULKAN] +MetalDeviceTyping: TypeAlias = Literal[DeviceType.METAL] +VPIDeviceTyping: TypeAlias = Literal[DeviceType.VPI] +ROCMDeviceTyping: TypeAlias = Literal[DeviceType.ROCM] + + +DeviceTypeT = TypeVar( + "DeviceTypeT", + CPUDeviceTyping, + CUDADeviceTyping, + CPUPinnedDeviceTyping, + OpenCLDeviceTyping, + VulkanDeviceTyping, + MetalDeviceTyping, + VPIDeviceTyping, + ROCMDeviceTyping, +) + + @dataclasses.dataclass(frozen=True) -class Device: +class Device(Generic[DeviceTypeT]): """ Representation of a computing device. @@ -397,10 +421,10 @@ class Device: core number, for `DeviceType.CUDA` it could be the CUDA device number, etc. """ - device_type: DeviceType + device_type: DeviceTypeT device_id: int - def __iter__(self) -> Iterator[DeviceType | int]: + def __iter__(self) -> Iterator[DeviceTypeT | int]: yield self.device_type yield self.device_id @@ -409,7 +433,7 @@ def __iter__(self) -> Iterator[DeviceType | int]: SliceLike = Union[int, Tuple[int, ...], None, slice, "NDArrayObject"] -class NDArrayObjectProto(Protocol): +class NDArrayObject(Protocol): @property def ndim(self) -> int: ... @@ -422,7 +446,7 @@ def shape(self) -> tuple[int, ...]: def dtype(self) -> Any: ... - def __getitem__(self, item: SliceLike) -> NDArrayObject: + def __getitem__(self, item: Any) -> NDArrayObject: ... def __abs__(self) -> NDArrayObject: @@ -434,38 +458,32 @@ def __neg__(self) -> NDArrayObject: def __add__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - def __radd__(self, other: NDArrayObject | Scalar) -> NDArrayObject: + def __radd__(self, other: Any) -> NDArrayObject: ... def __sub__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - def __rsub__(self, other: NDArrayObject | Scalar) -> NDArrayObject: + def __rsub__(self, other: Any) -> NDArrayObject: ... def __mul__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - def __rmul__(self, other: NDArrayObject | Scalar) -> NDArrayObject: + def __rmul__(self, other: Any) -> NDArrayObject: ... def __floordiv__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - def __rfloordiv__(self, other: NDArrayObject | Scalar) -> NDArrayObject: + def __rfloordiv__(self, other: Any) -> NDArrayObject: ... def __truediv__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - def __rtruediv__(self, other: NDArrayObject | Scalar) -> NDArrayObject: + def __rtruediv__(self, other: Any) -> NDArrayObject: ... def __pow__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... - - -NDArrayObject = Union[npt.NDArray, "CuPyNDArray", "JaxNDArray", NDArrayObjectProto] -NDArrayObjectT = TypeVar( - "NDArrayObjectT", npt.NDArray, "CuPyNDArray", "JaxNDArray", NDArrayObjectProto, covariant=True -) diff --git a/src/gt4py/eve/codegen.py b/src/gt4py/eve/codegen.py index 76fea347f0..3a964c92a9 100644 --- a/src/gt4py/eve/codegen.py +++ b/src/gt4py/eve/codegen.py @@ -155,7 +155,7 @@ def format_cpp_source( ) -> str: """Format C++ source code using clang-format.""" assert isinstance(_CLANG_FORMAT_EXECUTABLE, str) - args = [_CLANG_FORMAT_EXECUTABLE] + args = [_CLANG_FORMAT_EXECUTABLE, "--assume-filename=_gt4py_generated_file.cpp"] if style: args.append(f"--style={style}") if fallback_style: diff --git a/src/gt4py/eve/extended_typing.py b/src/gt4py/eve/extended_typing.py index 3b8373ade1..17462a37ff 100644 --- a/src/gt4py/eve/extended_typing.py +++ b/src/gt4py/eve/extended_typing.py @@ -36,6 +36,7 @@ from typing import * # noqa: F403 from typing import overload # Only needed to avoid false flake8 errors +import numpy.typing as npt import typing_extensions as _typing_extensions from typing_extensions import * # type: ignore[assignment,no-redef] # noqa: F403 @@ -236,6 +237,21 @@ def hexdigest(self) -> str: # -- Third party protocols -- +class SupportsArray(Protocol): + def __array__(self, dtype: Optional[npt.DTypeLike] = None, /) -> npt.NDArray[Any]: + ... + + +def supports_array(value: Any) -> TypeGuard[SupportsArray]: + return hasattr(value, "__array__") + + +class ArrayInterface(Protocol): + @property + def __array_interface__(self) -> Dict[str, Any]: + ... + + class ArrayInterfaceTypedDict(TypedDict): shape: Tuple[int, ...] typestr: str @@ -248,11 +264,19 @@ class ArrayInterfaceTypedDict(TypedDict): class StrictArrayInterface(Protocol): - __array_interface__: ArrayInterfaceTypedDict + @property + def __array_interface__(self) -> ArrayInterfaceTypedDict: + ... -class ArrayInterface(Protocol): - __array_interface__: Dict[str, Any] +def supports_array_interface(value: Any) -> TypeGuard[ArrayInterface]: + return hasattr(value, "__array_interface__") + + +class CUDAArrayInterface(Protocol): + @property + def __cuda_array_interface__(self) -> Dict[str, Any]: + ... class CUDAArrayInterfaceTypedDict(TypedDict): @@ -267,25 +291,45 @@ class CUDAArrayInterfaceTypedDict(TypedDict): class StrictCUDAArrayInterface(Protocol): - __cuda_array_interface__: CUDAArrayInterfaceTypedDict + @property + def __cuda_array_interface__(self) -> CUDAArrayInterfaceTypedDict: + ... -class CUDAArrayInterface(Protocol): - __cuda_array_interface__: Dict[str, Any] +def supports_cuda_array_interface(value: Any) -> TypeGuard[CUDAArrayInterface]: + """Check if the given value supports the CUDA Array Interface.""" + return hasattr(value, "__cuda_array_interface__") -PyCapsule = NewType("PyCapsule", object) DLPackDevice = Tuple[int, int] -class DLPackBuffer(Protocol): - def __dlpack__(self, stream: Optional[int] = None) -> PyCapsule: +class MultiStreamDLPackBuffer(Protocol): + def __dlpack__(self, *, stream: Optional[int] = None) -> Any: + ... + + def __dlpack_device__(self) -> DLPackDevice: + ... + + +class SingleStreamDLPackBuffer(Protocol): + def __dlpack__(self, *, stream: None = None) -> Any: ... def __dlpack_device__(self) -> DLPackDevice: ... +DLPackBuffer: TypeAlias = Union[MultiStreamDLPackBuffer, SingleStreamDLPackBuffer] + + +def supports_dlpack(value: Any) -> TypeGuard[DLPackBuffer]: + """Check if a given object supports the DLPack protocol.""" + return callable(getattr(value, "__dlpack__", None)) and callable( + getattr(value, "__dlpack_device__", None) + ) + + class DevToolsPrettyPrintable(Protocol): """Used by python-devtools (https://python-devtools.helpmanual.io/).""" diff --git a/src/gt4py/eve/utils.py b/src/gt4py/eve/utils.py index bdbc34f445..7104f7658f 100644 --- a/src/gt4py/eve/utils.py +++ b/src/gt4py/eve/utils.py @@ -228,6 +228,59 @@ def itemgetter_(key: Any, default: Any = NOTHING) -> Callable[[Any], Any]: _P = ParamSpec("_P") +_T = TypeVar("_T") + + +class fluid_partial(functools.partial): + """Create a `functools.partial` with support for multiple applications calling `.partial()`.""" + + def partial(self, *args: Any, **kwargs: Any) -> fluid_partial: + return fluid_partial(self, *args, **kwargs) + + +@overload +def with_fluid_partial( + func: Literal[None] = None, *args: Any, **kwargs: Any +) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]: + ... + + +@overload +def with_fluid_partial( # noqa: F811 # redefinition of unused function + func: Callable[_P, _T], *args: Any, **kwargs: Any +) -> Callable[_P, _T]: + ... + + +def with_fluid_partial( # noqa: F811 # redefinition of unused function + func: Optional[Callable[..., Any]] = None, *args: Any, **kwargs: Any +) -> Union[Callable[..., Any], Callable[[Callable[..., Any]], Callable[..., Any]]]: + """Add a `partial` attribute to the decorated function. + + The `partial` attribute is a function that behaves like `functools.partial`, + but also supports partial application of the decorated function. It can be + used both as a bare or a parameterized decorator. + + Arguments: + func: The function to decorate. + + Returns: + Returns the decorated function with an extra `.partial()` attribute. + + Example: + >>> @with_fluid_partial + ... def add(a, b): + ... return a + b + ... + >>> add.partial(1)(2) + 3 + """ + + def _decorator(func: Callable[..., Any]) -> Callable[..., Any]: + func.partial = fluid_partial(functools.partial, func, *args, **kwargs) # type: ignore[attr-defined] # add attribute + return func + + return _decorator(func) if func is not None else _decorator @overload @@ -318,9 +371,6 @@ def _decorator(base_cls: Type) -> Type: return _decorator -_T = TypeVar("_T") - - def noninstantiable(cls: Type[_T]) -> Type[_T]: """Make a class without abstract method non-instantiable (subclasses should be instantiable).""" if not isinstance(cls, type): diff --git a/src/gt4py/next/__init__.py b/src/gt4py/next/__init__.py index cc35899668..696c4f174c 100644 --- a/src/gt4py/next/__init__.py +++ b/src/gt4py/next/__init__.py @@ -25,6 +25,7 @@ from . import common, ffront, iterator, program_processors, type_inference from .common import Dimension, DimensionKind, Field, GridType +from .constructors import as_field, empty, full, ones, zeros from .embedded import ( # Just for registering field implementations nd_array_field as _nd_array_field, ) @@ -52,6 +53,12 @@ "DimensionKind", "Field", "GridType", + # from constructors + "empty", + "zeros", + "ones", + "full", + "as_field", # from iterator "NeighborTableOffsetProvider", "StridedNeighborOffsetProvider", diff --git a/src/gt4py/next/allocators.py b/src/gt4py/next/allocators.py new file mode 100644 index 0000000000..58600d8cda --- /dev/null +++ b/src/gt4py/next/allocators.py @@ -0,0 +1,349 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import abc +import dataclasses + +import numpy as np + +import gt4py._core.definitions as core_defs +import gt4py.next.common as common +import gt4py.storage.allocators as core_allocators +from gt4py.eve.extended_typing import ( + TYPE_CHECKING, + Any, + Callable, + Final, + Literal, + Optional, + Protocol, + Sequence, + TypeAlias, + TypeGuard, + cast, +) + + +try: + import cupy as cp +except ImportError: + cp = None + + +CUPY_DEVICE: Final[Literal[None, core_defs.DeviceType.CUDA, core_defs.DeviceType.ROCM]] = ( + None + if not cp + else (core_defs.DeviceType.ROCM if cp.cuda.get_hipcc_path() else core_defs.DeviceType.CUDA) +) + + +FieldLayoutMapper: TypeAlias = Callable[ + [Sequence[common.Dimension]], core_allocators.BufferLayoutMap +] + + +class FieldBufferAllocatorProtocol(Protocol[core_defs.DeviceTypeT]): + """Protocol for buffer allocators used to allocate memory for fields with a given domain.""" + + @property + @abc.abstractmethod + def __gt_device_type__(self) -> core_defs.DeviceTypeT: + ... + + @abc.abstractmethod + def __gt_allocate__( + self, + domain: common.Domain, + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int = 0, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position + ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ... + + +def is_field_allocator(obj: Any) -> TypeGuard[FieldBufferAllocatorProtocol]: + return hasattr(obj, "__gt_device_type__") and hasattr(obj, "__gt_allocate__") + + +def is_field_allocator_for( + obj: Any, device: core_defs.DeviceTypeT +) -> TypeGuard[FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]]: + return is_field_allocator(obj) and obj.__gt_device_type__ is device + + +class FieldBufferAllocatorFactoryProtocol(Protocol[core_defs.DeviceTypeT]): + """Protocol for device-specific buffer allocator factories for fields.""" + + @property + @abc.abstractmethod + def __gt_allocator__(self) -> FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]: + ... + + +def is_field_allocator_factory(obj: Any) -> TypeGuard[FieldBufferAllocatorFactoryProtocol]: + return hasattr(obj, "__gt_allocator__") + + +def is_field_allocator_factory_for( + obj: Any, device: core_defs.DeviceTypeT +) -> TypeGuard[FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]]: + return is_field_allocator_factory(obj) and obj.__gt_allocator__.__gt_device_type__ is device + + +FieldBufferAllocationUtil = ( + FieldBufferAllocatorProtocol[core_defs.DeviceTypeT] + | FieldBufferAllocatorFactoryProtocol[core_defs.DeviceTypeT] +) + + +def is_field_allocation_tool(obj: Any) -> TypeGuard[FieldBufferAllocationUtil]: + return is_field_allocator(obj) or is_field_allocator_factory(obj) + + +def get_allocator( + obj: Any, + *, + default: Optional[FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]] = None, + strict: bool = False, +) -> Optional[FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]]: + """ + Return a field-buffer-allocator from an object assumed to be an allocator or an allocator factory. + + A default allocator can be provided as fallback in case `obj` is neither an allocator nor a factory. + + Arguments: + obj: The allocator or allocator factory. + default: Fallback allocator. + strict: If `True`, raise an exception if there is no way to get a valid allocator + from `obj` or `default`. + + Returns: + A field buffer allocator. + + Raises: + TypeError: If `obj` is neither a field allocator nor a field allocator factory and no default + is provided in `strict` mode. + """ + if is_field_allocator(obj): + return obj + elif is_field_allocator_factory(obj): + return obj.__gt_allocator__ + elif not strict or is_field_allocator(default): + return default + else: + raise TypeError(f"Object {obj} is neither a field allocator nor a field allocator factory") + + +@dataclasses.dataclass(frozen=True) +class BaseFieldBufferAllocator(FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]): + """Parametrizable field buffer allocator base class.""" + + device_type: core_defs.DeviceTypeT + array_ns: core_allocators.ValidNumPyLikeAllocationNS + layout_mapper: FieldLayoutMapper + byte_alignment: int + + @property + def __gt_device_type__(self) -> core_defs.DeviceTypeT: + return self.device_type + + def __gt_allocate__( + self, + domain: common.Domain, + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int = 0, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position + ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + shape = domain.shape + layout_map = self.layout_mapper(domain.dims) + # TODO(egparedes): add support for non-empty aligned index values + assert aligned_index is None + + return core_allocators.NDArrayBufferAllocator(self.device_type, self.array_ns).allocate( + shape, dtype, device_id, layout_map, self.byte_alignment, aligned_index + ) + + +if TYPE_CHECKING: + __TensorFieldAllocatorAsFieldAllocatorInterfaceT: type[ + FieldBufferAllocatorProtocol + ] = BaseFieldBufferAllocator + + +def horizontal_first_layout_mapper( + dims: Sequence[common.Dimension], +) -> core_allocators.BufferLayoutMap: + """Map dimensions to a buffer layout making horizonal dims change the slowest (i.e. larger strides).""" + + def pos_of_kind(kind: common.DimensionKind) -> list[int]: + return [i for i, dim in enumerate(dims) if dim.kind == kind] + + horizontals = pos_of_kind(common.DimensionKind.HORIZONTAL) + verticals = pos_of_kind(common.DimensionKind.VERTICAL) + locals_ = pos_of_kind(common.DimensionKind.LOCAL) + + layout_map = [0] * len(dims) + for i, pos in enumerate(horizontals + verticals + locals_): + layout_map[pos] = len(dims) - 1 - i + + valid_layout_map = tuple(layout_map) + assert core_allocators.is_valid_layout_map(valid_layout_map) + + return valid_layout_map + + +if TYPE_CHECKING: + __horizontal_first_layout_mapper: FieldLayoutMapper = horizontal_first_layout_mapper + + +#: Registry of default allocators for each device type. +device_allocators: dict[core_defs.DeviceType, FieldBufferAllocatorProtocol] = {} + + +assert core_allocators.is_valid_nplike_allocation_ns(np) +np_alloc_ns: core_allocators.ValidNumPyLikeAllocationNS = np # Just for static type checking + + +class StandardCPUFieldBufferAllocator(BaseFieldBufferAllocator[core_defs.CPUDeviceTyping]): + """A field buffer allocator for CPU devices that uses a horizontal-first layout mapper and 64-byte alignment.""" + + def __init__(self) -> None: + super().__init__( + device_type=core_defs.DeviceType.CPU, + array_ns=np_alloc_ns, + layout_mapper=horizontal_first_layout_mapper, + byte_alignment=64, + ) + + +device_allocators[core_defs.DeviceType.CPU] = StandardCPUFieldBufferAllocator() + +assert is_field_allocator(device_allocators[core_defs.DeviceType.CPU]) + + +@dataclasses.dataclass(frozen=True) +class InvalidFieldBufferAllocator(FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]): + """A field buffer allocator that always raises an exception.""" + + device_type: core_defs.DeviceTypeT + exception: Exception + + @property + def __gt_device_type__(self) -> core_defs.DeviceTypeT: + return self.device_type + + def __gt_allocate__( + self, + domain: common.Domain, + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int = 0, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position + ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + raise self.exception + + +if CUPY_DEVICE is not None: + cp_alloc_ns: core_allocators.ValidNumPyLikeAllocationNS = cp # Just for static type checking + assert core_allocators.is_valid_nplike_allocation_ns(cp_alloc_ns) + + if CUPY_DEVICE is core_defs.DeviceType.CUDA: + + class CUDAFieldBufferAllocator(BaseFieldBufferAllocator[core_defs.CUDADeviceTyping]): + def __init__(self) -> None: + super().__init__( + device_type=core_defs.DeviceType.CUDA, + array_ns=cp_alloc_ns, + layout_mapper=horizontal_first_layout_mapper, + byte_alignment=128, + ) + + device_allocators[core_defs.DeviceType.CUDA] = CUDAFieldBufferAllocator() + + else: + + class ROCMFieldBufferAllocator(BaseFieldBufferAllocator[core_defs.ROCMDeviceTyping]): + def __init__(self) -> None: + super().__init__( + device_type=core_defs.DeviceType.ROCM, + array_ns=cp_alloc_ns, + layout_mapper=horizontal_first_layout_mapper, + byte_alignment=128, + ) + + device_allocators[core_defs.DeviceType.ROCM] = ROCMFieldBufferAllocator() + +else: + + class InvalidGPUFielBufferAllocator(InvalidFieldBufferAllocator[core_defs.CUDADeviceTyping]): + def __init__(self) -> None: + super().__init__( + device_type=core_defs.DeviceType.CUDA, + exception=RuntimeError("Missing `cupy` dependency for GPU allocation"), + ) + + +StandardGPUFieldBufferAllocator: Final[type[FieldBufferAllocatorProtocol]] = cast( + type[FieldBufferAllocatorProtocol], + type(device_allocators[CUPY_DEVICE]) if CUPY_DEVICE else InvalidGPUFielBufferAllocator, +) + + +def allocate( + domain: common.DomainLike, + dtype: core_defs.DType[core_defs.ScalarT], + *, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[FieldBufferAllocationUtil] = None, + device: Optional[core_defs.Device] = None, +) -> core_allocators.TensorBuffer: + """ + Allocate a TensorBuffer for the given domain and device or allocator. + + The arguments `device` and `allocator` are mutually exclusive. + If `device` is specified, the corresponding default allocator + (defined in :data:`device_allocators`) is used. + + Arguments: + domain: The domain which should be backed by the allocated tensor buffer. + dtype: Data type. + aligned_index: N-dimensional index of the first aligned element + allocator: The allocator to use for the allocation. + device: The device to allocate the tensor buffer on (using the default + allocator for this kind of device from :data:`device_allocators`). + + Returns: + The allocated tensor buffer. + + Raises: + ValueError + If illegal or inconsistent arguments are specified. + + """ + if device is None and allocator is None: + raise ValueError("No 'device' or 'allocator' specified") + actual_allocator = get_allocator(allocator) + if actual_allocator is None: + assert device is not None # for mypy + actual_allocator = device_allocators[device.device_type] + elif device is None: + device = core_defs.Device(actual_allocator.__gt_device_type__, 0) + elif device.device_type != actual_allocator.__gt_device_type__: + raise ValueError(f"Device {device} and allocator {actual_allocator} are incompatible") + + return actual_allocator.__gt_allocate__( + domain=common.domain(domain), + dtype=dtype, + device_id=device.device_id, + aligned_index=aligned_index, + ) diff --git a/src/gt4py/next/common.py b/src/gt4py/next/common.py index b85239cd0a..ffaa410563 100644 --- a/src/gt4py/next/common.py +++ b/src/gt4py/next/common.py @@ -80,15 +80,18 @@ def __str__(self): return f"{self.value}[{self.kind}]" -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass(frozen=True, init=False) class UnitRange(Sequence[int], Set[int]): """Range from `start` to `stop` with step size one.""" start: int stop: int - def __post_init__(self): - if self.stop <= self.start: + def __init__(self, start: core_defs.IntegralScalar, stop: core_defs.IntegralScalar) -> None: + if start < stop: + object.__setattr__(self, "start", int(start)) + object.__setattr__(self, "stop", int(stop)) + else: # make UnitRange(0,0) the single empty UnitRange object.__setattr__(self, "start", 0) object.__setattr__(self, "stop", 0) @@ -142,7 +145,12 @@ def __str__(self) -> str: return f"({self.start}:{self.stop})" -RangeLike: TypeAlias = UnitRange | range | tuple[int, int] +RangeLike: TypeAlias = ( + UnitRange + | range + | tuple[core_defs.IntegralScalar, core_defs.IntegralScalar] + | core_defs.IntegralScalar +) def unit_range(r: RangeLike) -> UnitRange: @@ -152,9 +160,17 @@ def unit_range(r: RangeLike) -> UnitRange: if r.step != 1: raise ValueError(f"`UnitRange` requires step size 1, got `{r.step}`.") return UnitRange(r.start, r.stop) - if isinstance(r, tuple) and isinstance(r[0], int) and isinstance(r[1], int): + # TODO(egparedes): use core_defs.IntegralScalar for `isinstance()` checks (see PEP 604) + # once the related mypy bug (#16358) gets fixed + if ( + isinstance(r, tuple) + and isinstance(r[0], core_defs.INTEGRAL_TYPES) + and isinstance(r[1], core_defs.INTEGRAL_TYPES) + ): return UnitRange(r[0], r[1]) - raise ValueError(f"`{r}` cannot be interpreted as `UnitRange`.") + if isinstance(r, core_defs.INTEGRAL_TYPES): + return UnitRange(0, cast(core_defs.IntegralScalar, r)) + raise ValueError(f"`{r!r}` cannot be interpreted as `UnitRange`.") IntIndex: TypeAlias = int | core_defs.IntegralScalar @@ -274,6 +290,10 @@ def __init__( def __len__(self) -> int: return len(self.ranges) + @property + def shape(self) -> tuple[int, ...]: + return tuple(len(r) for r in self.ranges) + @overload def __getitem__(self, index: int) -> NamedRange: ... @@ -350,12 +370,23 @@ def domain(domain_like: DomainLike) -> Domain: >>> domain({I: (2, 4), J: (3, 5)}) Domain(dims=(Dimension(value='I', kind=), Dimension(value='J', kind=)), ranges=(UnitRange(2, 4), UnitRange(3, 5))) + + >>> domain(((I, 2), (J, 4))) + Domain(dims=(Dimension(value='I', kind=), Dimension(value='J', kind=)), ranges=(UnitRange(0, 2), UnitRange(0, 4))) + + >>> domain({I: 2, J: 4}) + Domain(dims=(Dimension(value='I', kind=), Dimension(value='J', kind=)), ranges=(UnitRange(0, 2), UnitRange(0, 4))) """ if isinstance(domain_like, Domain): return domain_like if isinstance(domain_like, Sequence): return Domain(*tuple(named_range(d) for d in domain_like)) if isinstance(domain_like, Mapping): + if all(isinstance(elem, core_defs.INTEGRAL_TYPES) for elem in domain_like.values()): + return Domain( + dims=tuple(domain_like.keys()), + ranges=tuple(UnitRange(0, s) for s in domain_like.values()), # type: ignore[arg-type] # type of `s` is checked in condition + ) return Domain( dims=tuple(domain_like.keys()), ranges=tuple(unit_range(r) for r in domain_like.values()), @@ -383,20 +414,30 @@ def __call__(self, func: fbuiltins.BuiltInFunction[_R, _P], /) -> Callable[_P, _ ... +# TODO(havogt): replace this protocol with the new `GTFieldInterface` protocol class NextGTDimsInterface(Protocol): """ - A `GTDimsInterface` is an object providing the `__gt_dims__` property, naming :class:`Field` dimensions. + Protocol for objects providing the `__gt_dims__` property, naming :class:`Field` dimensions. - The dimension names are objects of type :class:`Dimension`, in contrast to :mod:`gt4py.cartesian`, - where the labels are `str` s with implied semantics, see :class:`~gt4py._core.definitions.GTDimsInterface` . + The dimension names are objects of type :class:`Dimension`, in contrast to + :mod:`gt4py.cartesian`, where the labels are `str` s with implied semantics, + see :class:`~gt4py._core.definitions.GTDimsInterface` . """ - # TODO(havogt): unify with GTDimsInterface, ideally in backward compatible way @property def __gt_dims__(self) -> tuple[Dimension, ...]: ... +# TODO(egparedes): add support for this new protocol in the cartesian module +class GTFieldInterface(Protocol): + """Protocol for object providing the `__gt_domain__` property, specifying the :class:`Domain` of a :class:`Field`.""" + + @property + def __gt_domain__(self) -> Domain: + ... + + @extended_runtime_checkable class Field(NextGTDimsInterface, core_defs.GTOriginInterface, Protocol[DimsT, core_defs.ScalarT]): __gt_builtin_func__: ClassVar[GTBuiltInFuncDispatcher] @@ -671,7 +712,7 @@ class FieldBuiltinFuncRegistry: def __init_subclass__(cls, **kwargs): cls._builtin_func_map = collections.ChainMap( - {}, # New empty `dict`` for new registrations on this class + {}, # New empty `dict` for new registrations on this class *[ c.__dict__["_builtin_func_map"].maps[0] # adding parent `dict`s in mro order for c in cls.__mro__ diff --git a/src/gt4py/next/constructors.py b/src/gt4py/next/constructors.py new file mode 100644 index 0000000000..30ef8452aa --- /dev/null +++ b/src/gt4py/next/constructors.py @@ -0,0 +1,297 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import Optional, cast + +import gt4py._core.definitions as core_defs +import gt4py.eve as eve +import gt4py.eve.extended_typing as xtyping +import gt4py.next.allocators as next_allocators +import gt4py.next.common as common +import gt4py.next.embedded.nd_array_field as nd_array_field +import gt4py.storage.cartesian.utils as storage_utils + + +@eve.utils.with_fluid_partial +def empty( + domain: common.DomainLike, + dtype: core_defs.DTypeLike = core_defs.Float64DType(()), + *, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[next_allocators.FieldBufferAllocationUtil] = None, + device: Optional[core_defs.Device] = None, +) -> nd_array_field.NdArrayField: + """Create a `Field` of uninitialized (undefined) values using the given (or device-default) allocator. + + This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. + + Arguments: + domain: Definition of the domain of the field (which fix the shape of the allocated field buffer). + See :class:`gt4py.next.common.Domain` for details. + dtype: Definition of the data type of the field. Defaults to `float64`. + + Keyword Arguments: + aligned_index: Index in the definition domain which should be used as reference + point for memory aligment computations. It can be set to the most common origin + of computations in this domain (if known) for performance reasons. + allocator: The allocator or allocator factory (e.g. backend) used for memory buffer + allocation, which knows how to optimize the memory layout for a given device. + Required if `device` is `None`. If both are valid, `allocator` will be chosen over + the default device allocator. + device: The device (CPU, type of accelerator) to optimize the memory layout for. + Required if `allocator` is `None` and will cause the default device allocator + to be used in that case. + + Returns: + A field, backed by a buffer with memory layout as specified by allocator and alignment requirements. + + Raises: + ValueError + If illegal or inconsistent arguments are specified. + + Examples: + Initialize a field in one dimension with a backend and a range domain: + + >>> from gt4py import next as gtx + >>> from gt4py.next.program_processors.runners import roundtrip + >>> IDim = gtx.Dimension("I") + >>> a = gtx.empty({IDim: range(3, 10)}, allocator=roundtrip.backend) + >>> a.shape + (7,) + + Initialize with a device and an integer domain. It works like a shape with named dimensions: + + >>> from gt4py._core import definitions as core_defs + >>> JDim = gtx.Dimension("J") + >>> b = gtx.empty({IDim: 3, JDim: 3}, int, device=core_defs.Device(core_defs.DeviceType.CPU, 0)) + >>> b.shape + (3, 3) + """ + dtype = core_defs.dtype(dtype) + buffer = next_allocators.allocate( + domain, dtype, aligned_index=aligned_index, allocator=allocator, device=device + ) + res = common.field(buffer.ndarray, domain=domain) + assert common.is_mutable_field(res) + assert isinstance(res, nd_array_field.NdArrayField) + return res + + +@eve.utils.with_fluid_partial +def zeros( + domain: common.DomainLike, + dtype: core_defs.DTypeLike = core_defs.Float64DType(()), + *, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None, + device: Optional[core_defs.Device] = None, +) -> nd_array_field.NdArrayField: + """Create a Field containing all zeros using the given (or device-default) allocator. + + This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. + See :func:`empty` for further details about the meaning of the arguments. + + Examples: + >>> from gt4py import next as gtx + >>> from gt4py.next.program_processors.runners import roundtrip + >>> IDim = gtx.Dimension("I") + >>> gtx.zeros({IDim: range(3, 10)}, allocator=roundtrip.backend).ndarray + array([0., 0., 0., 0., 0., 0., 0.]) + """ + field = empty( + domain=domain, + dtype=dtype, + aligned_index=aligned_index, + allocator=allocator, + device=device, + ) + field[...] = field.dtype.scalar_type(0) + return field + + +@eve.utils.with_fluid_partial +def ones( + domain: common.DomainLike, + dtype: core_defs.DTypeLike = core_defs.Float64DType(()), + *, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None, + device: Optional[core_defs.Device] = None, +) -> nd_array_field.NdArrayField: + """Create a Field containing all ones using the given (or device-default) allocator. + + This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. + See :func:`empty` for further details about the meaning of the arguments. + + Examples: + >>> from gt4py import next as gtx + >>> from gt4py.next.program_processors.runners import roundtrip + >>> IDim = gtx.Dimension("I") + >>> gtx.ones({IDim: range(3, 10)}, allocator=roundtrip.backend).ndarray + array([1., 1., 1., 1., 1., 1., 1.]) + """ + field = empty( + domain=domain, + dtype=dtype, + aligned_index=aligned_index, + allocator=allocator, + device=device, + ) + field[...] = field.dtype.scalar_type(1) + return field + + +@eve.utils.with_fluid_partial +def full( + domain: common.DomainLike, + fill_value: core_defs.Scalar, + dtype: Optional[core_defs.DTypeLike] = None, + *, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None, + device: Optional[core_defs.Device] = None, +) -> nd_array_field.NdArrayField: + """Create a Field where all values are set to `fill_value` using the given (or device-default) allocator. + + This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. + See :func:`empty` for further details about the meaning of the arguments. + + Arguments: + domain: Definition of the domain of the field (and consequently of the shape of the allocated field buffer). + fill_value: Each point in the field will be initialized to this value. + dtype: Definition of the data type of the field. Defaults to the dtype of `fill_value`. + + Examples: + >>> from gt4py import next as gtx + >>> from gt4py.next.program_processors.runners import roundtrip + >>> IDim = gtx.Dimension("I") + >>> gtx.full({IDim: 3}, 5, allocator=roundtrip.backend).ndarray + array([5, 5, 5]) + """ + field = empty( + domain=domain, + dtype=dtype if dtype is not None else core_defs.dtype(type(fill_value)), + aligned_index=aligned_index, + allocator=allocator, + device=device, + ) + field[...] = field.dtype.scalar_type(fill_value) + return field + + +@eve.utils.with_fluid_partial +def as_field( + domain: common.DomainLike | Sequence[common.Dimension], + data: core_defs.NDArrayObject, + dtype: Optional[core_defs.DTypeLike] = None, + *, + origin: Optional[Mapping[common.Dimension, int]] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None, + device: Optional[core_defs.Device] = None, + # copy=False, TODO +) -> nd_array_field.NdArrayField: + """Create a Field from an array-like object using the given (or device-default) allocator. + + This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. + See :func:`empty` for further details about the meaning of the extra keyword arguments. + + Parameters: + domain: Definition of the domain of the field (and consequently of the shape of the allocated field buffer). + In addition to the values allowed in `empty`, it can also just be a sequence of dimensions, + in which case the sizes of each dimension will then be taken from the shape of `data`. + data: Array like data object to initialize the field with + dtype: Definition of the data type of the field. Defaults to the same as `data`. + + Keyword Arguments: + origin: Only allowed if `domain` is a sequence of dimensions. The indicated index in `data` + will be the zero point of the resulting field. + allocator: Fully optional, in contrast to `empty`. + device: Fully optional, in contrast to `empty`, defaults to the same device as `data`. + + Examples: + >>> import numpy as np + >>> from gt4py import next as gtx + >>> IDim = gtx.Dimension("I") + >>> xdata = np.array([1, 2, 3]) + + Automatic domain from just dimensions: + + >>> a = gtx.as_field([IDim], xdata) + >>> a.ndarray + array([1, 2, 3]) + >>> a.domain.ranges[0] + UnitRange(0, 3) + + Shifted domain using origin: + + >>> b = gtx.as_field([IDim], xdata, origin={IDim: 1}) + >>> b.domain.ranges[0] + UnitRange(-1, 2) + + Equivalent domain fully specified: + + >>> gtx.as_field({IDim: range(-1, 2)}, xdata).domain.ranges[0] + UnitRange(-1, 2) + """ + if isinstance(domain, Sequence) and all(isinstance(dim, common.Dimension) for dim in domain): + domain = cast(Sequence[common.Dimension], domain) + if len(domain) != data.ndim: + raise ValueError( + f"Cannot construct `Field` from array of shape `{data.shape}` and domain `{domain}` " + ) + if origin: + domain_dims = set(domain) + if unknown_dims := set(origin.keys()) - domain_dims: + raise ValueError(f"Origin keys {unknown_dims} not in domain {domain}") + else: + origin = {} + actual_domain = common.domain( + [ + (d, (-(start_offset := origin.get(d, 0)), s - start_offset)) + for d, s in zip(domain, data.shape) + ] + ) + else: + if origin: + raise ValueError(f"Cannot specify origin for domain {domain}") + actual_domain = common.domain(cast(common.DomainLike, domain)) + + # TODO(egparedes): allow zero-copy construction (no reallocation) if buffer has + # already the correct layout and device. + shape = storage_utils.asarray(data).shape + if shape != actual_domain.shape: + raise ValueError(f"Cannot construct `Field` from array of shape `{shape}` ") + if dtype is None: + dtype = storage_utils.asarray(data).dtype + dtype = core_defs.dtype(dtype) + assert dtype.tensor_shape == () # TODO + + if allocator is device is None and xtyping.supports_dlpack(data): + device = core_defs.Device(*data.__dlpack_device__()) + + field = empty( + domain=actual_domain, + dtype=dtype, + aligned_index=aligned_index, + allocator=allocator, + device=device, + ) + + field[...] = field.array_ns.asarray(data) + + return field diff --git a/src/gt4py/next/embedded/nd_array_field.py b/src/gt4py/next/embedded/nd_array_field.py index fcaa09e7eb..527197e0bc 100644 --- a/src/gt4py/next/embedded/nd_array_field.py +++ b/src/gt4py/next/embedded/nd_array_field.py @@ -40,7 +40,7 @@ def _make_unary_array_field_intrinsic_func(builtin_name: str, array_builtin_name: str) -> Callable: - def _builtin_unary_op(a: _BaseNdArrayField) -> common.Field: + def _builtin_unary_op(a: NdArrayField) -> common.Field: xp = a.__class__.array_ns op = getattr(xp, array_builtin_name) new_data = op(a.ndarray) @@ -52,7 +52,7 @@ def _builtin_unary_op(a: _BaseNdArrayField) -> common.Field: def _make_binary_array_field_intrinsic_func(builtin_name: str, array_builtin_name: str) -> Callable: - def _builtin_binary_op(a: _BaseNdArrayField, b: common.Field) -> common.Field: + def _builtin_binary_op(a: NdArrayField, b: common.Field) -> common.Field: xp = a.__class__.array_ns op = getattr(xp, array_builtin_name) if hasattr(b, "__gt_builtin_func__"): # common.is_field(b): @@ -81,7 +81,7 @@ def _builtin_binary_op(a: _BaseNdArrayField, b: common.Field) -> common.Field: @dataclasses.dataclass(frozen=True) -class _BaseNdArrayField( +class NdArrayField( common.MutableField[common.DimsT, core_defs.ScalarT], common.FieldBuiltinFuncRegistry ): """ @@ -136,7 +136,7 @@ def from_array( *, domain: common.DomainLike, dtype_like: Optional[core_defs.DType] = None, # TODO define DTypeLike - ) -> _BaseNdArrayField: + ) -> NdArrayField: domain = common.domain(domain) xp = cls.array_ns @@ -157,7 +157,7 @@ def from_array( return cls(domain, array) - def remap(self: _BaseNdArrayField, connectivity) -> _BaseNdArrayField: + def remap(self: NdArrayField, connectivity) -> NdArrayField: raise NotImplementedError() def restrict(self, index: common.AnyIndexSpec) -> common.Field | core_defs.ScalarT: @@ -165,7 +165,7 @@ def restrict(self, index: common.AnyIndexSpec) -> common.Field | core_defs.Scala new_buffer = self.ndarray[buffer_slice] if len(new_domain) == 0: - assert core_defs.is_scalar_type(new_buffer) + # TODO: assert core_defs.is_scalar_type(new_buffer), new_buffer return new_buffer # type: ignore[return-value] # I don't think we can express that we return `ScalarT` here else: return self.__class__.from_array(new_buffer, domain=new_domain) @@ -196,7 +196,7 @@ def restrict(self, index: common.AnyIndexSpec) -> common.Field | core_defs.Scala __mod__ = __rmod__ = _make_binary_array_field_intrinsic_func("mod", "mod") - def __and__(self, other: common.Field | core_defs.ScalarT) -> _BaseNdArrayField: + def __and__(self, other: common.Field | core_defs.ScalarT) -> NdArrayField: if self.dtype == core_defs.BoolDType(): return _make_binary_array_field_intrinsic_func("logical_and", "logical_and")( self, other @@ -205,14 +205,14 @@ def __and__(self, other: common.Field | core_defs.ScalarT) -> _BaseNdArrayField: __rand__ = __and__ - def __or__(self, other: common.Field | core_defs.ScalarT) -> _BaseNdArrayField: + def __or__(self, other: common.Field | core_defs.ScalarT) -> NdArrayField: if self.dtype == core_defs.BoolDType(): return _make_binary_array_field_intrinsic_func("logical_or", "logical_or")(self, other) raise NotImplementedError("`__or__` not implemented for non-`bool` fields.") __ror__ = __or__ - def __xor__(self, other: common.Field | core_defs.ScalarT) -> _BaseNdArrayField: + def __xor__(self, other: common.Field | core_defs.ScalarT) -> NdArrayField: if self.dtype == core_defs.BoolDType(): return _make_binary_array_field_intrinsic_func("logical_xor", "logical_xor")( self, other @@ -221,7 +221,7 @@ def __xor__(self, other: common.Field | core_defs.ScalarT) -> _BaseNdArrayField: __rxor__ = __xor__ - def __invert__(self) -> _BaseNdArrayField: + def __invert__(self) -> NdArrayField: if self.dtype == core_defs.BoolDType(): return _make_unary_array_field_intrinsic_func("invert", "invert")(self) raise NotImplementedError("`__invert__` not implemented for non-`bool` fields.") @@ -243,8 +243,8 @@ def _slice( # -- Specialized implementations for intrinsic operations on array fields -- -_BaseNdArrayField.register_builtin_func(fbuiltins.abs, _BaseNdArrayField.__abs__) # type: ignore[attr-defined] -_BaseNdArrayField.register_builtin_func(fbuiltins.power, _BaseNdArrayField.__pow__) # type: ignore[attr-defined] +NdArrayField.register_builtin_func(fbuiltins.abs, NdArrayField.__abs__) # type: ignore[attr-defined] +NdArrayField.register_builtin_func(fbuiltins.power, NdArrayField.__pow__) # type: ignore[attr-defined] # TODO gamma for name in ( @@ -254,23 +254,23 @@ def _slice( ): if name in ["abs", "power", "gamma"]: continue - _BaseNdArrayField.register_builtin_func( + NdArrayField.register_builtin_func( getattr(fbuiltins, name), _make_unary_array_field_intrinsic_func(name, name) ) -_BaseNdArrayField.register_builtin_func( +NdArrayField.register_builtin_func( fbuiltins.minimum, _make_binary_array_field_intrinsic_func("minimum", "minimum") # type: ignore[attr-defined] ) -_BaseNdArrayField.register_builtin_func( +NdArrayField.register_builtin_func( fbuiltins.maximum, _make_binary_array_field_intrinsic_func("maximum", "maximum") # type: ignore[attr-defined] ) -_BaseNdArrayField.register_builtin_func( +NdArrayField.register_builtin_func( fbuiltins.fmod, _make_binary_array_field_intrinsic_func("fmod", "fmod") # type: ignore[attr-defined] ) def _np_cp_setitem( - self: _BaseNdArrayField[common.DimsT, core_defs.ScalarT], + self: NdArrayField[common.DimsT, core_defs.ScalarT], index: common.AnyIndexSpec, value: common.Field | core_defs.NDArrayObject | core_defs.ScalarT, ) -> None: @@ -293,7 +293,7 @@ def _np_cp_setitem( @dataclasses.dataclass(frozen=True) -class NumPyArrayField(_BaseNdArrayField): +class NumPyArrayField(NdArrayField): array_ns: ClassVar[ModuleType] = np __setitem__ = _np_cp_setitem @@ -306,7 +306,7 @@ class NumPyArrayField(_BaseNdArrayField): _nd_array_implementations.append(cp) @dataclasses.dataclass(frozen=True) - class CuPyArrayField(_BaseNdArrayField): + class CuPyArrayField(NdArrayField): array_ns: ClassVar[ModuleType] = cp __setitem__ = _np_cp_setitem @@ -318,7 +318,7 @@ class CuPyArrayField(_BaseNdArrayField): _nd_array_implementations.append(jnp) @dataclasses.dataclass(frozen=True) - class JaxArrayField(_BaseNdArrayField): + class JaxArrayField(NdArrayField): array_ns: ClassVar[ModuleType] = jnp def __setitem__( @@ -355,7 +355,7 @@ def _builtins_broadcast( raise AssertionError("Scalar case not reachable from `fbuiltins.broadcast`.") -_BaseNdArrayField.register_builtin_func(fbuiltins.broadcast, _builtins_broadcast) +NdArrayField.register_builtin_func(fbuiltins.broadcast, _builtins_broadcast) def _get_slices_from_domain_slice( diff --git a/src/gt4py/next/ffront/decorator.py b/src/gt4py/next/ffront/decorator.py index 12ab3955ab..2d12331513 100644 --- a/src/gt4py/next/ffront/decorator.py +++ b/src/gt4py/next/ffront/decorator.py @@ -30,8 +30,9 @@ from devtools import debug from gt4py._core import definitions as core_defs +from gt4py.eve import utils as eve_utils from gt4py.eve.extended_typing import Any, Optional -from gt4py.eve.utils import UIDGenerator +from gt4py.next import allocators as next_allocators from gt4py.next.common import Dimension, DimensionKind, GridType from gt4py.next.ffront import ( dialect_ast_enums, @@ -214,6 +215,15 @@ def __post_init__(self): f"The following closure variables are undefined: {', '.join(undefined_symbols)}" ) + @functools.cached_property + def __gt_allocator__( + self, + ) -> next_allocators.FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]: + if self.backend: + return self.backend.__gt_allocator__ + else: + raise RuntimeError(f"Program {self} does not have a backend set.") + def with_backend(self, backend: ppi.ProgramExecutor) -> Program: return dataclasses.replace(self, backend=backend) @@ -609,7 +619,7 @@ def as_program( # with the out argument of the program we generate here. loc = self.foast_node.location - param_sym_uids = UIDGenerator() # use a new UID generator to allow caching + param_sym_uids = eve_utils.UIDGenerator() # use a new UID generator to allow caching type_ = self.__gt_type__() params_decl: list[past.Symbol] = [ @@ -790,8 +800,8 @@ def scan_operator( >>> from gt4py.next.iterator import embedded >>> embedded._column_range = 1 # implementation detail >>> KDim = gtx.Dimension("K", kind=gtx.DimensionKind.VERTICAL) - >>> inp = gtx.np_as_located_field(KDim)(np.ones((10,))) - >>> out = gtx.np_as_located_field(KDim)(np.zeros((10,))) + >>> inp = gtx.as_field([KDim], np.ones((10,))) + >>> out = gtx.as_field([KDim], np.zeros((10,))) >>> @gtx.scan_operator(axis=KDim, forward=True, init=0.) ... def scan_operator(carry: float, val: float) -> float: ... return carry+val diff --git a/src/gt4py/next/iterator/embedded.py b/src/gt4py/next/iterator/embedded.py index 3d159eaae7..674f99f61c 100644 --- a/src/gt4py/next/iterator/embedded.py +++ b/src/gt4py/next/iterator/embedded.py @@ -23,6 +23,7 @@ import itertools import math import sys +import warnings from typing import ( Any, Callable, @@ -1015,6 +1016,8 @@ def _shift_field_indices( def np_as_located_field( *axes: common.Dimension, origin: Optional[dict[common.Dimension, int]] = None ) -> Callable[[np.ndarray], common.Field]: + warnings.warn("`np_as_located_field()` is deprecated, use `gtx.as_field()`", DeprecationWarning) + origin = origin or {} def _maker(a) -> common.Field: @@ -1063,7 +1066,7 @@ def dtype(self) -> core_defs.Int32DType: @property def ndarray(self) -> core_defs.NDArrayObject: - return AttributeError("Cannot get `ndarray` of an infinite Field.") + raise AttributeError("Cannot get `ndarray` of an infinite Field.") def remap(self, index_field: common.Field) -> common.Field: # TODO can be implemented by constructing and ndarray (but do we know of which kind?) @@ -1169,7 +1172,7 @@ def dtype(self) -> core_defs.DType[core_defs.ScalarT]: @property def ndarray(self) -> core_defs.NDArrayObject: - return AttributeError("Cannot get `ndarray` of an infinite Field.") + raise AttributeError("Cannot get `ndarray` of an infinite Field.") def remap(self, index_field: common.Field) -> common.Field: # TODO can be implemented by constructing and ndarray (but do we know of which kind?) diff --git a/src/gt4py/next/program_processors/formatters/gtfn.py b/src/gt4py/next/program_processors/formatters/gtfn.py index 2952bf3465..f9fa154641 100644 --- a/src/gt4py/next/program_processors/formatters/gtfn.py +++ b/src/gt4py/next/program_processors/formatters/gtfn.py @@ -20,5 +20,5 @@ @program_formatter -def format_sourcecode(program: itir.FencilDefinition, *arg: Any, **kwargs: Any) -> str: +def format_cpp(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> str: return generate(program, **kwargs) diff --git a/src/gt4py/next/program_processors/formatters/pretty_print.py b/src/gt4py/next/program_processors/formatters/pretty_print.py index b6afb88759..4f4a15f908 100644 --- a/src/gt4py/next/program_processors/formatters/pretty_print.py +++ b/src/gt4py/next/program_processors/formatters/pretty_print.py @@ -14,15 +14,23 @@ from typing import Any -from gt4py.next.iterator import ir as itir -from gt4py.next.iterator.pretty_parser import pparse -from gt4py.next.iterator.pretty_printer import pformat -from gt4py.next.program_processors.processor_interface import program_formatter +import gt4py.eve as eve +import gt4py.next.iterator.ir as itir +import gt4py.next.iterator.pretty_parser as pretty_parser +import gt4py.next.iterator.pretty_printer as pretty_printer +import gt4py.next.program_processors.processor_interface as ppi -@program_formatter -def pretty_format_and_check(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> str: - pretty = pformat(program) - parsed = pparse(pretty) - assert parsed == program +class _RemoveITIRSymTypes(eve.NodeTranslator): + def visit_Sym(self, node: itir.Sym) -> itir.Sym: + return itir.Sym(id=node.id, dtype=None, kind=None) + + +@ppi.program_formatter +def format_itir_and_check(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> str: + # remove types from ITIR as they are not supported for the roundtrip + root = _RemoveITIRSymTypes().visit(program) + pretty = pretty_printer.pformat(root) + parsed = pretty_parser.pparse(pretty) + assert parsed == root return pretty diff --git a/src/gt4py/next/program_processors/formatters/type_check.py b/src/gt4py/next/program_processors/formatters/type_check.py index 07cbc89ebd..8f17b8cf98 100644 --- a/src/gt4py/next/program_processors/formatters/type_check.py +++ b/src/gt4py/next/program_processors/formatters/type_check.py @@ -18,7 +18,7 @@ @program_formatter -def check(program: itir.FencilDefinition, *args, **kwargs) -> str: +def check_type_inference(program: itir.FencilDefinition, *args, **kwargs) -> str: type_inference.pprint(type_inference.infer(program, offset_provider=kwargs["offset_provider"])) transformed = apply_common_transforms( program, lift_mode=kwargs.get("lift_mode"), offset_provider=kwargs["offset_provider"] diff --git a/src/gt4py/next/program_processors/otf_compile_executor.py b/src/gt4py/next/program_processors/otf_compile_executor.py index cd08c16933..8dff34a35d 100644 --- a/src/gt4py/next/program_processors/otf_compile_executor.py +++ b/src/gt4py/next/program_processors/otf_compile_executor.py @@ -12,12 +12,16 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +from __future__ import annotations + import dataclasses from typing import Any, Generic, Optional, TypeVar -from gt4py.next.iterator import ir as itir +import gt4py._core.definitions as core_defs +import gt4py.next.allocators as next_allocators +import gt4py.next.iterator.ir as itir +import gt4py.next.program_processors.processor_interface as ppi from gt4py.next.otf import languages, recipes, stages, workflow -from gt4py.next.program_processors import processor_interface as ppi SrcL = TypeVar("SrcL", bound=languages.NanobindSrcL) @@ -54,3 +58,26 @@ def __call__(self, program: itir.FencilDefinition, *args, **kwargs: Any) -> None @property def __name__(self) -> str: return self.name or repr(self) + + +@dataclasses.dataclass(frozen=True) +class OTFBackend(Generic[core_defs.DeviceTypeT]): + executor: ppi.ProgramExecutor + allocator: next_allocators.FieldBufferAllocatorProtocol[core_defs.DeviceTypeT] + + def __call__(self, program: itir.FencilDefinition, *args, **kwargs: Any) -> None: + self.executor.__call__(program, *args, **kwargs) + + @property + def __name__(self) -> str: + return getattr(self.executor, "__name__", None) or repr(self) + + @property + def kind(self) -> type[ppi.ProgramExecutor]: + return self.executor.kind + + @property + def __gt_allocator__( + self, + ) -> next_allocators.FieldBufferAllocatorProtocol[core_defs.DeviceTypeT]: + return self.allocator diff --git a/src/gt4py/next/program_processors/processor_interface.py b/src/gt4py/next/program_processors/processor_interface.py index b39438937e..d9f8b36301 100644 --- a/src/gt4py/next/program_processors/processor_interface.py +++ b/src/gt4py/next/program_processors/processor_interface.py @@ -26,21 +26,25 @@ """ from __future__ import annotations -from typing import Callable, Protocol, TypeGuard, TypeVar, cast +import functools +from collections.abc import Sequence +from typing import Any, Callable, Literal, Optional, Protocol, TypeGuard, TypeVar, cast -from gt4py.next.iterator import ir as itir +import gt4py._core.definitions as core_defs +import gt4py.next.allocators as next_allocators +import gt4py.next.iterator.ir as itir OutputT = TypeVar("OutputT", covariant=True) ProcessorKindT = TypeVar("ProcessorKindT", bound="ProgramProcessor", covariant=True) -class ProgramProcessorFunction(Protocol[OutputT]): +class ProgramProcessorCallable(Protocol[OutputT]): def __call__(self, program: itir.FencilDefinition, *args, **kwargs) -> OutputT: ... -class ProgramProcessor(ProgramProcessorFunction[OutputT], Protocol[OutputT, ProcessorKindT]): +class ProgramProcessor(ProgramProcessorCallable[OutputT], Protocol[OutputT, ProcessorKindT]): @property def kind(self) -> type[ProcessorKindT]: ... @@ -52,46 +56,133 @@ def kind(self) -> type[ProgramFormatter]: return ProgramFormatter -def program_formatter(func: ProgramProcessorFunction[str]) -> ProgramFormatter: +def make_program_processor( + func: ProgramProcessorCallable[OutputT], + kind: type[ProcessorKindT], + *, + name: Optional[str] = None, + accept_args: None | int | Literal["all"] = "all", + accept_kwargs: None | Sequence[str] | Literal["all"] = "all", +) -> ProgramProcessor[OutputT, ProcessorKindT]: + """ + Create a program processor from a callable function. + + Args: + func: The callable function to be wrapped as a program processor. + kind: The type of the processor. + name: The name of the processor. + accept_args: The number of positional arguments to accept, or "all" to accept all. + accept_kwargs: The names of the keyword arguments to accept, or "all" to accept all. + + Returns: + A program processor that wraps the given function. + + Raises: + ValueError: If the value of `accept_args` or `accept_kwargs` is invalid. + """ + args_filter: Callable[[Sequence], Sequence] + if accept_args is None: + args_filter = lambda args: () # noqa: E731 # use def instead of named lambdas + elif accept_args == "all": + args_filter = lambda args: args # noqa: E731 + elif isinstance(accept_args, int): + if accept_args < 0: + raise ValueError( + f"Number of accepted arguments cannot be a negative number ({accept_args})" + ) + args_filter = lambda args: args[:accept_args] # type: ignore[misc] # noqa: E731 + else: + raise ValueError(f"Invalid ({accept_args}) accept_args value") + + filtered_kwargs: Callable[[dict[str, Any]], dict[str, Any]] + if accept_kwargs is None: + filtered_kwargs = lambda kwargs: {} # noqa: E731 # use def instead of named lambdas + elif accept_kwargs == "all": # don't swap with 'isinstance(..., Sequence)' + filtered_kwargs = lambda kwargs: kwargs # noqa: E731 + elif isinstance(accept_kwargs, Sequence): + if not all(isinstance(a, str) for a in accept_kwargs): + raise ValueError(f"Provided invalid list of keyword argument names ({accept_args})") + filtered_kwargs = lambda kwargs: { # noqa: E731 + key: value for key, value in kwargs.items() if key in accept_kwargs # type: ignore[operator] # key in accept_kwargs + } + else: + raise ValueError(f"Invalid ({accept_kwargs}) 'accept_kwargs' value") + + @functools.wraps(func) + def _wrapper(program: itir.FencilDefinition, *args, **kwargs) -> OutputT: + return func(program, *args_filter(args), **filtered_kwargs(kwargs)) + + if name is not None: + _wrapper.__name__ = name + + # this operation effectively changes the type of the returned object, + # which is the intention here + _wrapper.kind = kind # type: ignore[attr-defined] + + return cast(ProgramProcessor[OutputT, ProcessorKindT], _wrapper) + + +def program_formatter( + func: ProgramProcessorCallable[str], + *, + name: Optional[str] = None, + accept_args: None | int | Literal["all"] = "all", + accept_kwargs: Sequence[str] | None | Literal["all"] = "all", +) -> ProgramFormatter: """ Turn a function that formats a program as a string into a ProgramFormatter. Examples: - --------- - >>> @program_formatter - ... def format_foo(fencil: itir.FencilDefinition, *args, **kwargs) -> str: - ... '''A very useless fencil formatter.''' - ... return "foo" + >>> @program_formatter + ... def format_foo(fencil: itir.FencilDefinition, *args, **kwargs) -> str: + ... '''A very useless fencil formatter.''' + ... return "foo" - >>> ensure_processor_kind(format_foo, ProgramFormatter) + >>> ensure_processor_kind(format_foo, ProgramFormatter) """ - # this operation effectively changes the type of func and that is the intention here - func.kind = ProgramFormatter # type: ignore[attr-defined] - return cast(ProgramProcessor[str, ProgramFormatter], func) + return make_program_processor( + func, + ProgramFormatter, # type: ignore[type-abstract] # ProgramFormatter is abstract + name=name, + accept_args=accept_args, + accept_kwargs=accept_kwargs, + ) -class ProgramExecutor(ProgramProcessor[None, "ProgramExecutor"], Protocol): +class ProgramExecutor(ProgramProcessor[None, "ProgramExecutor"]): @property def kind(self) -> type[ProgramExecutor]: return ProgramExecutor -def program_executor(func: ProgramProcessorFunction[None]) -> ProgramExecutor: +def program_executor( + func: ProgramProcessorCallable[None], + *, + name: Optional[str] = None, + accept_args: None | int | Literal["all"] = "all", + accept_kwargs: Sequence[str] | None | Literal["all"] = "all", +) -> ProgramExecutor: """ Turn a function that executes a program into a ``ProgramExecutor``. Examples: - --------- - >>> @program_executor - ... def badly_execute(fencil: itir.FencilDefinition, *args, **kwargs) -> None: - ... '''A useless and incorrect fencil executor.''' - ... pass + >>> @program_executor + ... def badly_execute(fencil: itir.FencilDefinition, *args, **kwargs) -> None: + ... '''A useless and incorrect fencil executor.''' + ... pass - >>> ensure_processor_kind(badly_execute, ProgramExecutor) + >>> ensure_processor_kind(badly_execute, ProgramExecutor) """ - # this operation effectively changes the type of func and that is the intention here - func.kind = ProgramExecutor # type: ignore[attr-defined] - return cast(ProgramExecutor, func) + return cast( + ProgramExecutor, + make_program_processor( + func, + ProgramExecutor, + name=name, + accept_args=accept_args, + accept_kwargs=accept_kwargs, + ), + ) def is_processor_kind( @@ -105,3 +196,25 @@ def ensure_processor_kind( ) -> None: if not is_processor_kind(obj, kind): raise TypeError(f"{obj} is not a {kind.__name__}!") + + +class ProgramBackend( + ProgramProcessor[None, "ProgramExecutor"], + next_allocators.FieldBufferAllocatorFactoryProtocol[core_defs.DeviceTypeT], + Protocol[core_defs.DeviceTypeT], +): + ... + + +def is_program_backend(obj: Callable) -> TypeGuard[ProgramBackend]: + return is_processor_kind( + obj, ProgramExecutor # type: ignore[type-abstract] # ProgramExecutor is abstract + ) and next_allocators.is_field_allocator_factory(obj) + + +def is_program_backend_for( + obj: Callable, device: core_defs.DeviceTypeT +) -> TypeGuard[ProgramBackend[core_defs.DeviceTypeT]]: + return is_processor_kind( + obj, ProgramExecutor # type: ignore[type-abstract] # ProgramExecutor is abstract + ) and next_allocators.is_field_allocator_factory_for(obj, device) diff --git a/src/gt4py/next/program_processors/runners/dace_iterator/__init__.py b/src/gt4py/next/program_processors/runners/dace_iterator/__init__.py index be63d6809d..9f67cb26da 100644 --- a/src/gt4py/next/program_processors/runners/dace_iterator/__init__.py +++ b/src/gt4py/next/program_processors/runners/dace_iterator/__init__.py @@ -19,7 +19,9 @@ from dace.codegen.compiled_sdfg import CompiledSDFG from dace.transformation.auto import auto_optimize as autoopt +import gt4py.next.allocators as next_allocators import gt4py.next.iterator.ir as itir +import gt4py.next.program_processors.otf_compile_executor as otf_exec from gt4py.next.common import Dimension, Domain, UnitRange, is_field from gt4py.next.iterator.embedded import NeighborTableOffsetProvider, StridedNeighborOffsetProvider from gt4py.next.iterator.transforms import LiftMode, apply_common_transforms @@ -235,22 +237,43 @@ def run_dace_iterator(program: itir.FencilDefinition, *args, **kwargs) -> None: @program_executor -def run_dace(program: itir.FencilDefinition, *args, **kwargs) -> None: - run_on_gpu = any(not isinstance(arg.ndarray, np.ndarray) for arg in args if is_field(arg)) - if run_on_gpu: - if cp is None: - raise RuntimeError( - f"Non-numpy field argument passed to program {program.id} but module cupy not installed" - ) - - if not all(isinstance(arg.ndarray, cp.ndarray) for arg in args if is_field(arg)): - raise RuntimeError("Execution on GPU requires all fields to be stored as cupy arrays") - +def _run_dace_cpu(program: itir.FencilDefinition, *args, **kwargs) -> None: run_dace_iterator( program, *args, **kwargs, - build_cache=_build_cache_gpu if run_on_gpu else _build_cache_cpu, + build_cache=_build_cache_cpu, build_type=_build_type, - run_on_gpu=run_on_gpu, + run_on_gpu=False, ) + + +run_dace_cpu = otf_exec.OTFBackend( + executor=_run_dace_cpu, + allocator=next_allocators.StandardCPUFieldBufferAllocator(), +) + +if cp: + + @program_executor + def _run_dace_gpu(program: itir.FencilDefinition, *args, **kwargs) -> None: + run_dace_iterator( + program, + *args, + **kwargs, + build_cache=_build_cache_gpu, + build_type=_build_type, + run_on_gpu=True, + ) + +else: + + @program_executor + def _run_dace_gpu(program: itir.FencilDefinition, *args, **kwargs) -> None: + raise RuntimeError("Missing `cupy` dependency for GPU execution.") + + +run_dace_gpu = otf_exec.OTFBackend( + executor=_run_dace_gpu, + allocator=next_allocators.StandardGPUFieldBufferAllocator(), +) diff --git a/src/gt4py/next/program_processors/runners/double_roundtrip.py b/src/gt4py/next/program_processors/runners/double_roundtrip.py index 651fb43fa7..2f06d17c7f 100644 --- a/src/gt4py/next/program_processors/runners/double_roundtrip.py +++ b/src/gt4py/next/program_processors/runners/double_roundtrip.py @@ -12,13 +12,25 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -from typing import Any +from __future__ import annotations -from gt4py.next.iterator import ir as itir -from gt4py.next.program_processors.processor_interface import program_executor -from gt4py.next.program_processors.runners import roundtrip +from typing import TYPE_CHECKING, Any +import gt4py.next.program_processors.otf_compile_executor as otf_compile_executor +import gt4py.next.program_processors.processor_interface as ppi +import gt4py.next.program_processors.runners.roundtrip as roundtrip -@program_executor + +if TYPE_CHECKING: + import gt4py.next.iterator.ir as itir + + +@ppi.program_executor def executor(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> None: - roundtrip.executor(program, *args, dispatch_backend=roundtrip.executor, **kwargs) + roundtrip.execute_roundtrip(program, *args, dispatch_backend=roundtrip.executor, **kwargs) + + +backend = otf_compile_executor.OTFBackend( + executor=executor, + allocator=roundtrip.backend.allocator, +) diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index 35c10fe353..7233e7a893 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -16,7 +16,8 @@ import numpy.typing as npt -from gt4py._core import definitions as core_defs +import gt4py._core.definitions as core_defs +import gt4py.next.allocators as next_allocators from gt4py.eve.utils import content_hash from gt4py.next import common from gt4py.next.iterator.transforms import LiftMode @@ -129,29 +130,66 @@ def compilation_hash(otf_closure: stages.ProgramCall) -> int: ) -run_gtfn = otf_compile_executor.OTFCompileExecutor( +gtfn_executor = otf_compile_executor.OTFCompileExecutor( name="run_gtfn", otf_workflow=GTFN_DEFAULT_WORKFLOW ) +run_gtfn = otf_compile_executor.OTFBackend( + executor=gtfn_executor, + allocator=next_allocators.StandardCPUFieldBufferAllocator(), +) -run_gtfn_imperative = otf_compile_executor.OTFCompileExecutor( +gtfn_imperative_executor = otf_compile_executor.OTFCompileExecutor( name="run_gtfn_imperative", - otf_workflow=run_gtfn.otf_workflow.replace( - translation=run_gtfn.otf_workflow.translation.replace(use_imperative_backend=True), + otf_workflow=gtfn_executor.otf_workflow.replace( + translation=gtfn_executor.otf_workflow.translation.replace(use_imperative_backend=True), ), ) +run_gtfn_imperative = otf_compile_executor.OTFBackend( + executor=gtfn_imperative_executor, + allocator=next_allocators.StandardCPUFieldBufferAllocator(), +) -run_gtfn_cached = otf_compile_executor.CachedOTFCompileExecutor( +# TODO(ricoh): add API for converting an executor to a cached version of itself and vice versa +gtfn_cached_executor = otf_compile_executor.CachedOTFCompileExecutor( name="run_gtfn_cached", - otf_workflow=workflow.CachedStep(step=run_gtfn.otf_workflow, hash_function=compilation_hash), -) # todo(ricoh): add API for converting an executor to a cached version of itself and vice versa + otf_workflow=workflow.CachedStep( + step=gtfn_executor.otf_workflow, hash_function=compilation_hash + ), +) +run_gtfn_cached = otf_compile_executor.OTFBackend( + executor=gtfn_cached_executor, + allocator=next_allocators.StandardCPUFieldBufferAllocator(), +) + -run_gtfn_gpu = otf_compile_executor.OTFCompileExecutor( +run_gtfn_with_temporaries = otf_compile_executor.OTFBackend( + executor=otf_compile_executor.OTFCompileExecutor( + name="run_gtfn_with_temporaries", + otf_workflow=gtfn_executor.otf_workflow.replace( + translation=gtfn_executor.otf_workflow.translation.replace( + lift_mode=LiftMode.FORCE_TEMPORARIES + ), + ), + ), + allocator=next_allocators.StandardCPUFieldBufferAllocator(), +) + +gtfn_gpu_executor = otf_compile_executor.OTFCompileExecutor( name="run_gtfn_gpu", otf_workflow=GTFN_GPU_WORKFLOW ) +run_gtfn_gpu = otf_compile_executor.OTFBackend( + executor=gtfn_gpu_executor, + allocator=next_allocators.StandardGPUFieldBufferAllocator(), +) + -run_gtfn_with_temporaries = otf_compile_executor.OTFCompileExecutor( - name="run_gtfn_with_temporaries", - otf_workflow=run_gtfn.otf_workflow.replace( - translation=run_gtfn.otf_workflow.translation.replace(lift_mode=LiftMode.FORCE_TEMPORARIES), +gtfn_gpu_cached_executor = otf_compile_executor.CachedOTFCompileExecutor( + name="run_gtfn_gpu_cached", + otf_workflow=workflow.CachedStep( + step=gtfn_gpu_executor.otf_workflow, hash_function=compilation_hash ), ) +run_gtfn_gpu_cached = otf_compile_executor.OTFBackend( + executor=gtfn_gpu_cached_executor, + allocator=next_allocators.StandardGPUFieldBufferAllocator(), +) diff --git a/src/gt4py/next/program_processors/runners/roundtrip.py b/src/gt4py/next/program_processors/runners/roundtrip.py index 3560384eb4..f81606eec0 100644 --- a/src/gt4py/next/program_processors/runners/roundtrip.py +++ b/src/gt4py/next/program_processors/runners/roundtrip.py @@ -21,20 +21,25 @@ from collections.abc import Callable, Iterable from typing import Any, Optional -from gt4py.eve import codegen +import gt4py.eve.codegen as codegen +import gt4py.next.allocators as next_allocators +import gt4py.next.common as common +import gt4py.next.iterator.embedded as embedded +import gt4py.next.iterator.ir as itir +import gt4py.next.iterator.transforms as itir_transforms +import gt4py.next.iterator.transforms.global_tmps as gtmps_transform +import gt4py.next.program_processors.otf_compile_executor as otf_compile_executor +import gt4py.next.program_processors.processor_interface as ppi from gt4py.eve.codegen import FormatTemplate as as_fmt, MakoTemplate as as_mako -from gt4py.next import common -from gt4py.next.iterator import embedded, ir as itir -from gt4py.next.iterator.transforms import LiftMode, apply_common_transforms -from gt4py.next.iterator.transforms.global_tmps import FencilWithTemporaries -from gt4py.next.program_processors.processor_interface import program_executor def _create_tmp(axes, origin, shape, dtype): if isinstance(dtype, tuple): return f"({','.join(_create_tmp(axes, origin, shape, dt) for dt in dtype)},)" else: - return f"gtx.np_as_located_field({axes}, origin={origin})(np.empty({shape}, dtype=np.dtype('{dtype}')))" + return ( + f"gtx.as_field([{axes}], np.empty({shape}, dtype=np.dtype('{dtype}')), origin={origin})" + ) class EmbeddedDSL(codegen.TemplatedGenerator): @@ -103,7 +108,7 @@ def visit_Temporary(self, node, **kwargs): def fencil_generator( ir: itir.Node, debug: bool, - lift_mode: LiftMode, + lift_mode: itir_transforms.LiftMode, use_embedded: bool, offset_provider: dict[str, embedded.NeighborTableOffsetProvider], ) -> Callable: @@ -125,7 +130,9 @@ def fencil_generator( if cache_key in _FENCIL_CACHE: return _FENCIL_CACHE[cache_key] - ir = apply_common_transforms(ir, lift_mode=lift_mode, offset_provider=offset_provider) + ir = itir_transforms.apply_common_transforms( + ir, lift_mode=lift_mode, offset_provider=offset_provider + ) program = EmbeddedDSL.apply(ir) @@ -180,8 +187,12 @@ def fencil_generator( if not debug: pathlib.Path(source_file_name).unlink(missing_ok=True) - assert isinstance(ir, (itir.FencilDefinition, FencilWithTemporaries)) - fencil_name = ir.fencil.id + "_wrapper" if isinstance(ir, FencilWithTemporaries) else ir.id + assert isinstance(ir, (itir.FencilDefinition, gtmps_transform.FencilWithTemporaries)) + fencil_name = ( + ir.fencil.id + "_wrapper" + if isinstance(ir, gtmps_transform.FencilWithTemporaries) + else ir.id + ) fencil = getattr(mod, fencil_name) _FENCIL_CACHE[cache_key] = fencil @@ -195,8 +206,8 @@ def execute_roundtrip( column_axis: Optional[common.Dimension] = None, offset_provider: dict[str, embedded.NeighborTableOffsetProvider], debug: bool = False, - lift_mode: LiftMode = LiftMode.FORCE_INLINE, - dispatch_backend: Optional[str] = None, + lift_mode: itir_transforms.LiftMode = itir_transforms.LiftMode.FORCE_INLINE, + dispatch_backend: Optional[ppi.ProgramExecutor] = None, ) -> None: fencil = fencil_generator( ir, @@ -216,6 +227,8 @@ def execute_roundtrip( return fencil(*args, **new_kwargs) -@program_executor -def executor(program: itir.FencilDefinition, *args, **kwargs) -> None: - execute_roundtrip(program, *args, **kwargs) +executor = ppi.program_executor(execute_roundtrip) # type: ignore[arg-type] + +backend = otf_compile_executor.OTFBackend( + executor=executor, allocator=next_allocators.StandardCPUFieldBufferAllocator() +) diff --git a/src/gt4py/storage/allocators.py b/src/gt4py/storage/allocators.py index adc45efaff..061f79f146 100644 --- a/src/gt4py/storage/allocators.py +++ b/src/gt4py/storage/allocators.py @@ -22,6 +22,7 @@ import operator import numpy as np +import numpy.typing as npt from gt4py._core import definitions as core_defs from gt4py.eve import extended_typing as xtyping @@ -34,11 +35,10 @@ Protocol, Sequence, Tuple, + Type, TypeAlias, TypeGuard, - TypeVar, Union, - cast, ) @@ -48,17 +48,16 @@ cp = None -_ScalarT = TypeVar("_ScalarT", bound=core_defs.Scalar) - - _NDBuffer: TypeAlias = Union[ + # xtyping.Buffer, # TODO: add once we update typing_extensions xtyping.ArrayInterface, xtyping.CUDAArrayInterface, xtyping.DLPackBuffer, ] - -#: Tuple of positive integers encoding a permutation of the dimensions. +#: Tuple of positive integers encoding a permutation of the dimensions, such that +#: layout_map[i] = j means that the i-th dimension of the tensor corresponds +#: to the j-th dimension in the (C-layout) buffer. BufferLayoutMap = NewType("BufferLayoutMap", Sequence[core_defs.PositiveIntegral]) @@ -72,7 +71,7 @@ def is_valid_layout_map(value: Sequence[Any]) -> TypeGuard[BufferLayoutMap]: @dataclasses.dataclass(frozen=True) -class TensorBuffer(Generic[core_defs.NDArrayObjectT, _ScalarT]): +class TensorBuffer(Generic[core_defs.DeviceTypeT, core_defs.ScalarT]): """ N-dimensional (tensor-like) memory buffer. @@ -88,9 +87,9 @@ class TensorBuffer(Generic[core_defs.NDArrayObjectT, _ScalarT]): dtype: Data type descriptor. shape: Tuple with lengths of the corresponding tensor dimensions. strides: Tuple with sizes (in bytes) of the steps in each dimension. - layout_map: Tuple with the order of the dimensions in the buffer. + layout_map: Tuple with the order of the dimensions in the buffer layout_map[i] = j means that the i-th dimension of the tensor - corresponds to the j-th dimension in the buffer. + corresponds to the j-th dimension in the (C-layout) buffer. byte_offset: Offset (in bytes) from the beginning of the buffer to the first valid element. byte_alignment: Alignment (in bytes) of the first valid element. @@ -100,37 +99,45 @@ class TensorBuffer(Generic[core_defs.NDArrayObjectT, _ScalarT]): buffer: _NDBuffer = dataclasses.field(hash=False) memory_address: int - device: core_defs.Device - dtype: core_defs.DType[_ScalarT] + device: core_defs.Device[core_defs.DeviceTypeT] + dtype: core_defs.DType[core_defs.ScalarT] shape: core_defs.TensorShape strides: Tuple[int, ...] layout_map: BufferLayoutMap byte_offset: int byte_alignment: int aligned_index: Tuple[int, ...] - ndarray: core_defs.NDArrayObjectT = dataclasses.field(hash=False) + ndarray: core_defs.NDArrayObject = dataclasses.field(hash=False) @property def ndim(self): """Order of the tensor (`len(tensor_buffer.shape)`).""" return len(self.shape) - def __array__(self, dtype: Optional[np.dtype] = None) -> np.ndarray: - if not hasattr(self.ndarray, "__array__"): + def __array__(self, dtype: Optional[npt.DTypeLike] = None, /) -> np.ndarray: + if not xtyping.supports_array(self.ndarray): raise TypeError("Cannot export tensor buffer as NumPy array.") - return self.ndarray.__array__(dtype=dtype) # type: ignore[call-overload] # TODO(egparades): figure out the mypy fix + return self.ndarray.__array__(dtype) + + @property + def __array_interface__(self) -> dict[str, Any]: + if not xtyping.supports_array_interface(self.ndarray): + raise TypeError("Cannot export tensor buffer to NumPy array interface.") + + return self.ndarray.__array_interface__ @property - def __cuda_array_interface__(self) -> xtyping.CUDAArrayInterfaceTypedDict: - if not hasattr(self.ndarray, "__cuda_array_interface__"): + def __cuda_array_interface__(self) -> dict[str, Any]: + if not xtyping.supports_cuda_array_interface(self.ndarray): raise TypeError("Cannot export tensor buffer to CUDA array interface.") + return self.ndarray.__cuda_array_interface__ - def __dlpack__(self) -> xtyping.PyCapsule: + def __dlpack__(self, *, stream: Optional[int] = None) -> Any: if not hasattr(self.ndarray, "__dlpack__"): raise TypeError("Cannot export tensor buffer to DLPack.") - return self.ndarray.__dlpack__() + return self.ndarray.__dlpack__(stream=stream) # type: ignore[call-arg,arg-type] # stream is not always supported def __dlpack_device__(self) -> xtyping.DLPackDevice: if not hasattr(self.ndarray, "__dlpack_device__"): @@ -138,32 +145,39 @@ def __dlpack_device__(self) -> xtyping.DLPackDevice: return self.ndarray.__dlpack_device__() -class BufferAllocator(Protocol[core_defs.NDArrayObjectT]): +if TYPE_CHECKING: + # TensorBuffer should be compatible with all the expected buffer interfaces + __TensorBufferAsArrayInterfaceT: Type[xtyping.ArrayInterface] = TensorBuffer + __TensorBufferAsCUDAArrayInterfaceT: Type[xtyping.CUDAArrayInterface] = TensorBuffer + __TensorBufferAsDLPackBufferT: Type[xtyping.DLPackBuffer] = TensorBuffer + + +class BufferAllocator(Protocol[core_defs.DeviceTypeT]): """Protocol for buffer allocators.""" @property - def device_type(self) -> core_defs.DeviceType: + def device_type(self) -> core_defs.DeviceTypeT: ... def allocate( self, shape: Sequence[core_defs.IntegralScalar], - dtype: core_defs.DType[_ScalarT], + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int, layout_map: BufferLayoutMap, - device: core_defs.Device, byte_alignment: int, aligned_index: Optional[Sequence[int]] = None, - ) -> TensorBuffer[core_defs.NDArrayObjectT, _ScalarT]: + ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: """ Allocate a TensorBuffer with the given shape, layout and alignment settings. Args: - device: Device where the buffer is allocated. - dtype: Data type descriptor. shape: Tensor dimensions. - layout_map: layout of the dimensions in the buffer. - layout_map[i] = j means that the i-th dimension of the tensor - corresponds to the j-th dimension of the buffer. + dtype: Data type descriptor. + layout_map: layout of the dimensions in a buffer with C-layout (contiguous dimension is last). + layout_map[i] = j means that the i-th dimension of the tensor + corresponds to the j-th dimension of the buffer. + device_id: Id of the device of `device_type` where the buffer is allocated. byte_alignment: Alignment (in bytes) of the first valid element. aligned_index: N-dimensional index of the first aligned element. """ @@ -171,18 +185,23 @@ def allocate( @dataclasses.dataclass(frozen=True, init=False) -class _BaseNDArrayBufferAllocator(abc.ABC, Generic[core_defs.NDArrayObjectT]): +class _BaseNDArrayBufferAllocator(abc.ABC, Generic[core_defs.DeviceTypeT]): """Base class for buffer allocators using NumPy-like modules.""" + @property + @abc.abstractmethod + def device_type(self) -> core_defs.DeviceTypeT: + pass + def allocate( self, shape: Sequence[core_defs.IntegralScalar], - dtype: core_defs.DType[_ScalarT], + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int, layout_map: BufferLayoutMap, - device: core_defs.Device, byte_alignment: int, aligned_index: Optional[Sequence[int]] = None, - ) -> TensorBuffer[core_defs.NDArrayObjectT, _ScalarT]: + ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: if not core_defs.is_valid_tensor_shape(shape): raise ValueError(f"Invalid shape {shape}") ndim = len(shape) @@ -221,7 +240,7 @@ def allocate( strides = tuple(strides_lst) # Allocate total size - buffer = self.raw_alloc(total_length, device) + buffer = self.malloc(total_length, device_id) memory_address = self.array_ns.byte_bounds(buffer)[0] # Compute final byte offset to align the requested buffer index @@ -247,7 +266,7 @@ def allocate( buffer, dtype, shape, padded_shape, item_size, strides, byte_offset ) - if device.device_type == core_defs.DeviceType.ROCM: + if self.device_type == core_defs.DeviceType.ROCM: # until we can rely on dlpack ndarray.__hip_array_interface__ = { # type: ignore[attr-defined] "shape": ndarray.shape, # type: ignore[union-attr] @@ -262,7 +281,7 @@ def allocate( return TensorBuffer( buffer=buffer, memory_address=memory_address, - device=device, + device=core_defs.Device(self.device_type, device_id), dtype=dtype, shape=shape, strides=strides, @@ -275,77 +294,101 @@ def allocate( @property @abc.abstractmethod - def array_ns(self) -> _NumPyLikeNamespace[core_defs.NDArrayObjectT]: + def array_ns(self) -> ValidNumPyLikeAllocationNS: pass @abc.abstractmethod - def raw_alloc(self, length: int, device: core_defs.Device) -> _NDBuffer: + def malloc(self, length: int, device_id: int) -> _NDBuffer: pass @abc.abstractmethod def tensorize( self, buffer: _NDBuffer, - dtype: core_defs.DType[_ScalarT], + dtype: core_defs.DType[core_defs.ScalarT], shape: core_defs.TensorShape, allocated_shape: core_defs.TensorShape, item_size: int, strides: Sequence[int], byte_offset: int, - ) -> core_defs.NDArrayObjectT: + ) -> core_defs.NDArrayObject: pass -if TYPE_CHECKING: +class ValidNumPyLikeAllocationNS(Protocol): + class _NumPyLibModule(Protocol): + class _NumPyLibStridesModule(Protocol): + @staticmethod + def as_strided( + ndarray: core_defs.NDArrayObject, **kwargs: Any + ) -> core_defs.NDArrayObject: + ... + + stride_tricks: _NumPyLibStridesModule + + lib: _NumPyLibModule + + @staticmethod + def empty(shape: Tuple[int, ...], dtype: Any) -> _NDBuffer: + ... - class _NumPyLikeNamespace(Protocol[core_defs.NDArrayObjectT]): - class _NumPyLibModule(Protocol): - class _NumPyLibStridesModule(Protocol): - def as_strided( - self, ndarray: core_defs.NDArrayObjectT, **kwargs: Any - ) -> core_defs.NDArrayObjectT: - ... + @staticmethod + def byte_bounds(ndarray: _NDBuffer) -> Tuple[int, int]: + ... - stride_tricks: _NumPyLibStridesModule - lib: _NumPyLibModule +def is_valid_nplike_allocation_ns(obj: Any) -> TypeGuard[ValidNumPyLikeAllocationNS]: + return ( + len(required_keys := {"empty", "byte_bounds", "lib"} & set(dir(np))) == len(required_keys) + and "stride_tricks" in dir(np.lib) + and "as_strided" in dir(np.lib.stride_tricks) + ) - def empty(self, shape: core_defs.TensorShape, dtype: np.dtype) -> core_defs.NDArrayObjectT: - ... - def byte_bounds(self, ndarray: _NDBuffer) -> tuple[int, int]: - ... +if not TYPE_CHECKING: + is_valid_nplike_allocation_ns = functools.lru_cache(maxsize=None)(is_valid_nplike_allocation_ns) -@dataclasses.dataclass(frozen=True) -class NumPyLikeArrayBufferAllocator(_BaseNDArrayBufferAllocator[core_defs.NDArrayObjectT]): - device_type: core_defs.DeviceType - array_ns_ref: _NumPyLikeNamespace[core_defs.NDArrayObjectT] +@dataclasses.dataclass(frozen=True, init=False) +class NDArrayBufferAllocator(_BaseNDArrayBufferAllocator[core_defs.DeviceTypeT]): + _device_type: core_defs.DeviceTypeT + _array_ns: ValidNumPyLikeAllocationNS + + def __init__( + self, + device_type: core_defs.DeviceTypeT, + array_ns: ValidNumPyLikeAllocationNS, + ): + object.__setattr__(self, "_device_type", device_type) + object.__setattr__(self, "_array_ns", array_ns) + + @property + def device_type(self) -> core_defs.DeviceTypeT: + return self._device_type @property - def array_ns(self) -> _NumPyLikeNamespace[core_defs.NDArrayObjectT]: - return self.array_ns_ref + def array_ns(self) -> ValidNumPyLikeAllocationNS: + return self._array_ns - def raw_alloc(self, length: int, device: core_defs.Device) -> _NDBuffer: - if device.device_type != core_defs.DeviceType.CPU and device.device_id != 0: - raise ValueError(f"Unsupported device {device} for memory allocation") + def malloc(self, length: int, device_id: int) -> _NDBuffer: + if self.device_type == core_defs.DeviceType.CPU and device_id != 0: + raise ValueError(f"Unsupported device ID {device_id} for CPU memory allocation") shape = (length,) assert core_defs.is_valid_tensor_shape(shape) # for mypy - return cast( - _NDBuffer, self.array_ns.empty(shape=shape, dtype=np.dtype(np.uint8)) - ) # TODO(havogt): figure out how we type this properly + out = self.array_ns.empty(shape=tuple(shape), dtype=np.dtype(np.uint8)) + return out def tensorize( self, buffer: _NDBuffer, - dtype: core_defs.DType[_ScalarT], + dtype: core_defs.DType[core_defs.ScalarT], shape: core_defs.TensorShape, allocated_shape: core_defs.TensorShape, item_size: int, strides: Sequence[int], byte_offset: int, - ) -> core_defs.NDArrayObjectT: + ) -> core_defs.NDArrayObject: aligned_buffer = buffer[byte_offset : byte_offset + math.prod(allocated_shape) * item_size] # type: ignore[index] # TODO(egparedes): should we extend `_NDBuffer`s to cover __getitem__? flat_ndarray = aligned_buffer.view(dtype=np.dtype(dtype)) tensor_view = self.array_ns.lib.stride_tricks.as_strided( @@ -356,53 +399,3 @@ def tensorize( tensor_view = tensor_view[shape_slices] return tensor_view - - -#: Registry of allocators for each device type. -device_allocators: dict[core_defs.DeviceType, BufferAllocator] = {} - -device_allocators[core_defs.DeviceType.CPU] = NumPyLikeArrayBufferAllocator( - device_type=core_defs.DeviceType.CPU, - array_ns_ref=cast(_NumPyLikeNamespace, np) if TYPE_CHECKING else np, -) - -if cp: - device_allocators[core_defs.DeviceType.CUDA] = NumPyLikeArrayBufferAllocator( - device_type=core_defs.DeviceType.CUDA, - array_ns_ref=cp, - ) - device_allocators[core_defs.DeviceType.ROCM] = NumPyLikeArrayBufferAllocator( - device_type=core_defs.DeviceType.ROCM, - array_ns_ref=cp, - ) - - -def allocate( - shape: Sequence[core_defs.IntegralScalar], - dtype: core_defs.DType[_ScalarT], - layout_map: BufferLayoutMap, - *, - byte_alignment: int, - aligned_index: Optional[Sequence[int]] = None, - device: Optional[core_defs.Device] = None, - allocator: Optional[BufferAllocator] = None, -) -> TensorBuffer: - """Allocate a TensorBuffer with the given settings on the given device.""" - if device is None and allocator is None: - raise ValueError("No 'device' or 'allocator' specified") - if device is None: - assert allocator is not None # for mypy - device = core_defs.Device(allocator.device_type, 0) - assert device is not None # for mypy - allocator = allocator or device_allocators[device.device_type] - if device.device_type != allocator.device_type: - raise ValueError(f"Device {device} and allocator {allocator} are incompatible") - - return allocator.allocate( - shape=shape, - dtype=dtype, - layout_map=layout_map, - byte_alignment=byte_alignment, - aligned_index=aligned_index, - device=device, - ) diff --git a/src/gt4py/storage/cartesian/interface.py b/src/gt4py/storage/cartesian/interface.py index 6e19b9d771..517593dd38 100644 --- a/src/gt4py/storage/cartesian/interface.py +++ b/src/gt4py/storage/cartesian/interface.py @@ -15,7 +15,7 @@ from __future__ import annotations import numbers -from typing import Any, Optional, Protocol, Sequence, Tuple, Union +from typing import Optional, Sequence, Union import numpy as np @@ -33,20 +33,7 @@ except ImportError: dace = None -if np.lib.NumpyVersion(np.__version__) >= "1.20.0": - from numpy.typing import ArrayLike, DTypeLike -else: - ArrayLike = Any # type: ignore[misc] # assign multiple types in both branches - DTypeLike = Any # type: ignore[misc] # assign multiple types in both branches - - -# Protocols -class GTDimsInterface(Protocol): - __gt_dims__: Tuple[str, ...] - - -class GTOriginInterface(Protocol): - __gt_origin__: Tuple[int, ...] +from numpy.typing import ArrayLike, DTypeLike # Helper functions diff --git a/src/gt4py/storage/cartesian/utils.py b/src/gt4py/storage/cartesian/utils.py index e6060328ff..0f7cf5d0ab 100644 --- a/src/gt4py/storage/cartesian/utils.py +++ b/src/gt4py/storage/cartesian/utils.py @@ -17,7 +17,7 @@ import collections.abc import math import numbers -from typing import Any, Literal, Optional, Sequence, Tuple, Union, cast +from typing import Any, Final, Literal, Optional, Sequence, Tuple, Union, cast import numpy as np import numpy.typing as npt @@ -39,8 +39,34 @@ cp = None +CUPY_DEVICE: Final[Literal[None, core_defs.DeviceType.CUDA, core_defs.DeviceType.ROCM]] = ( + None + if not cp + else (core_defs.DeviceType.ROCM if cp.cuda.get_hipcc_path() else core_defs.DeviceType.CUDA) +) + + FieldLike = Union["cp.ndarray", np.ndarray, ArrayInterface, CUDAArrayInterface] +assert allocators.is_valid_nplike_allocation_ns(np) + +_CPUBufferAllocator = allocators.NDArrayBufferAllocator( + device_type=core_defs.DeviceType.CPU, + array_ns=np, +) + +_GPUBufferAllocator: Optional[allocators.NDArrayBufferAllocator] = None +if cp: + assert allocators.is_valid_nplike_allocation_ns(cp) + if CUPY_DEVICE == core_defs.DeviceType.CUDA: + _GPUBufferAllocator = allocators.NDArrayBufferAllocator( + device_type=core_defs.DeviceType.CUDA, array_ns=cp + ) + else: + _GPUBufferAllocator = allocators.NDArrayBufferAllocator( + device_type=core_defs.DeviceType.ROCM, array_ns=cp + ) + def _idx_from_order(order): return list(np.argsort(order)) @@ -201,15 +227,15 @@ def allocate_cpu( aligned_index: Optional[Sequence[int]], ) -> Tuple[allocators._NDBuffer, np.ndarray]: device = core_defs.Device(core_defs.DeviceType.CPU, 0) - buffer = allocators.allocate( + buffer = _CPUBufferAllocator.allocate( shape, core_defs.dtype(dtype), + device_id=device.device_id, layout_map=layout_map, - device=device, byte_alignment=alignment_bytes, aligned_index=aligned_index, ) - return buffer.buffer, buffer.ndarray + return buffer.buffer, cast(np.ndarray, buffer.ndarray) def allocate_gpu( @@ -219,15 +245,16 @@ def allocate_gpu( alignment_bytes: int, aligned_index: Optional[Sequence[int]], ) -> Tuple["cp.ndarray", "cp.ndarray"]: - device = core_defs.Device( + assert _GPUBufferAllocator is not None, "GPU allocation library or device not found" + device = core_defs.Device( # type: ignore[type-var] core_defs.DeviceType.ROCM if gt_config.GT4PY_USE_HIP else core_defs.DeviceType.CUDA, 0 ) - buffer = allocators.allocate( + buffer = _GPUBufferAllocator.allocate( shape, core_defs.dtype(dtype), + device_id=device.device_id, layout_map=layout_map, - device=device, byte_alignment=alignment_bytes, aligned_index=aligned_index, ) - return buffer.buffer, buffer.ndarray + return buffer.buffer, cast("cp.ndarray", buffer.ndarray) diff --git a/tests/eve_tests/unit_tests/test_extended_typing.py b/tests/eve_tests/unit_tests/test_extended_typing.py index da3cbbaeda..733e12577c 100644 --- a/tests/eve_tests/unit_tests/test_extended_typing.py +++ b/tests/eve_tests/unit_tests/test_extended_typing.py @@ -232,6 +232,69 @@ def test_subclass_check_with_data_members(self, sample_class_defs): assert issubclass(ConcreteClass, NoDataProto) +def test_supports_array_interface(): + from gt4py.eve.extended_typing import supports_array_interface + + class ArrayInterface: + __array_interface__ = "interface" + + class NoArrayInterface: + pass + + assert supports_array_interface(ArrayInterface()) + assert not supports_array_interface(NoArrayInterface()) + assert not supports_array_interface("array") + assert not supports_array_interface(None) + + +def test_supports_cuda_array_interface(): + from gt4py.eve.extended_typing import supports_cuda_array_interface + + class CudaArray: + def __cuda_array_interface__(self): + return {} + + class NoCudaArray: + pass + + assert supports_cuda_array_interface(CudaArray()) + assert not supports_cuda_array_interface(NoCudaArray()) + assert not supports_cuda_array_interface("cuda") + assert not supports_cuda_array_interface(None) + + +def test_supports_dlpack(): + from gt4py.eve.extended_typing import supports_dlpack + + class DummyDLPackBuffer: + def __dlpack__(self): + pass + + def __dlpack_device__(self): + pass + + class DLPackBufferWithWrongBufferMethod: + __dlpack__ = "buffer" + + def __dlpack_device__(self): + pass + + class DLPackBufferWithoutDevice: + def __dlpack__(self): + pass + + class DLPackBufferWithWrongDevice: + def __dlpack__(self): + pass + + __dlpack_device__ = "device" + + assert supports_dlpack(DummyDLPackBuffer()) + assert not supports_dlpack(DLPackBufferWithWrongBufferMethod()) + assert not supports_dlpack(DLPackBufferWithoutDevice()) + assert not supports_dlpack(DLPackBufferWithWrongDevice()) + + @pytest.mark.parametrize("t", (int, float, dict, tuple, frozenset, collections.abc.Mapping)) def test_is_actual_valid_type(t): assert xtyping.is_actual_type(t) diff --git a/tests/eve_tests/unit_tests/test_utils.py b/tests/eve_tests/unit_tests/test_utils.py index fda69d75d9..99513ba175 100644 --- a/tests/eve_tests/unit_tests/test_utils.py +++ b/tests/eve_tests/unit_tests/test_utils.py @@ -137,6 +137,21 @@ def unique_data_items(request): ] +def test_fluid_partial(): + from gt4py.eve.utils import fluid_partial + + def func(a, b, c): + return a + b + c + + fp1 = fluid_partial(func, 1) + fp2 = fp1.partial(2) + fp3 = fp2.partial(3) + + assert fp1(2, 3) == 6 + assert fp2(3) == 6 + assert fp3() == 6 + + def test_noninstantiable_class(): @eve.utils.noninstantiable class NonInstantiableClass(eve.datamodels.DataModel): diff --git a/tests/next_tests/__init__.py b/tests/next_tests/__init__.py index 54bc4d9c69..e2905ab49a 100644 --- a/tests/next_tests/__init__.py +++ b/tests/next_tests/__init__.py @@ -23,4 +23,8 @@ def get_processor_id(processor): module_path = processor.__module__.split(".")[-1] name = processor.__name__ return f"{module_path}.{name}" + elif hasattr(processor, "__module__") and hasattr(processor, "__class__"): + module_path = processor.__module__.split(".")[-1] + name = processor.__class__.__name__ + return f"{module_path}.{name}" return repr(processor) diff --git a/tests/next_tests/exclusion_matrices.py b/tests/next_tests/exclusion_matrices.py index 98ac9352c3..ddea04649f 100644 --- a/tests/next_tests/exclusion_matrices.py +++ b/tests/next_tests/exclusion_matrices.py @@ -11,21 +11,73 @@ # distribution for a copy of the license or check . # # SPDX-License-Identifier: GPL-3.0-or-later -import pytest - """Contains definition of test-exclusion matrices, see ADR 15.""" +import enum +import importlib + +import pytest + + # Skip definitions XFAIL = pytest.xfail SKIP = pytest.skip -# Processor ids as returned by next_tests.get_processor_id() -DACE = "dace_iterator.run_dace_iterator" -GTFN_CPU = "otf_compile_executor.run_gtfn" -GTFN_CPU_IMPERATIVE = "otf_compile_executor.run_gtfn_imperative" -GTFN_CPU_WITH_TEMPORARIES = "otf_compile_executor.run_gtfn_with_temporaries" -GTFN_FORMAT_SOURCECODE = "gtfn.format_sourcecode" + +# Program processors +class _PythonObjectIdMixin: + # Only useful for classes inheriting from (str, enum.Enum) + def __str__(self) -> str: + assert isinstance(self.value, str) + return self.value + + def load(self) -> object: + *mods, obj = self.value.split(".") + globs = {"_m": importlib.import_module(".".join(mods))} + obj = eval(f"_m.{obj}", globs) + return obj + + __invert__ = load + + def short_id(self, num_components: int = 2) -> str: + return ".".join(self.value.split(".")[-num_components:]) + + +class ProgramBackendId(_PythonObjectIdMixin, str, enum.Enum): + GTFN_CPU = "gt4py.next.program_processors.runners.gtfn.run_gtfn" + GTFN_CPU_IMPERATIVE = "gt4py.next.program_processors.runners.gtfn.run_gtfn_imperative" + GTFN_CPU_WITH_TEMPORARIES = ( + "gt4py.next.program_processors.runners.gtfn.run_gtfn_with_temporaries" + ) + ROUNDTRIP = "gt4py.next.program_processors.runners.roundtrip.backend" + DOUBLE_ROUNDTRIP = "gt4py.next.program_processors.runners.double_roundtrip.backend" + + +class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum): + DACE_CPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_cpu" + + +class ProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum): + GTFN_CPU_EXECUTOR = f"{ProgramBackendId.GTFN_CPU}.executor" + GTFN_CPU_IMPERATIVE_EXECUTOR = f"{ProgramBackendId.GTFN_CPU_IMPERATIVE}.executor" + GTFN_CPU_WITH_TEMPORARIES = f"{ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES}.executor" + ROUNDTRIP = f"{ProgramBackendId.ROUNDTRIP}.executor" + DOUBLE_ROUNDTRIP = f"{ProgramBackendId.DOUBLE_ROUNDTRIP}.executor" + + +class OptionalProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum): + DACE_CPU_EXECUTOR = f"{OptionalProgramBackendId.DACE_CPU}.executor" + + +class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): + GTFN_CPP_FORMATTER = "gt4py.next.program_processors.formatters.gtfn.format_cpp" + ITIR_PRETTY_PRINTER = ( + "gt4py.next.program_processors.formatters.pretty_print.format_itir_and_check" + ) + ITIR_TYPE_CHECKER = "gt4py.next.program_processors.formatters.type_check.check_type_inference" + LISP_FORMATTER = "gt4py.next.program_processors.formatters.lisp.format_lisp" + # Test markers REQUIRES_ATLAS = "requires_atlas" @@ -66,7 +118,7 @@ #: Skip matrix, contains for each backend processor a list of tuples with following fields: #: (, ) BACKEND_SKIP_TEST_MATRIX = { - DACE: GTFN_SKIP_TEST_LIST + OptionalProgramBackendId.DACE_CPU: GTFN_SKIP_TEST_LIST + [ (USES_CAN_DEREF, XFAIL, UNSUPPORTED_MESSAGE), (USES_CONSTANT_FIELDS, XFAIL, UNSUPPORTED_MESSAGE), @@ -80,20 +132,20 @@ (USES_TUPLE_RETURNS, XFAIL, UNSUPPORTED_MESSAGE), (USES_ZERO_DIMENSIONAL_FIELDS, XFAIL, UNSUPPORTED_MESSAGE), ], - GTFN_CPU: GTFN_SKIP_TEST_LIST + ProgramBackendId.GTFN_CPU: GTFN_SKIP_TEST_LIST + [ (USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE), ], - GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST + ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST + [ (USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE), ], - GTFN_CPU_WITH_TEMPORARIES: GTFN_SKIP_TEST_LIST + ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES: GTFN_SKIP_TEST_LIST + [ (USES_DYNAMIC_OFFSETS, XFAIL, UNSUPPORTED_MESSAGE), (USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE), ], - GTFN_FORMAT_SOURCECODE: [ + ProgramFormatterId.GTFN_CPP_FORMATTER: [ (USES_REDUCTION_WITH_ONLY_SPARSE_FIELDS, XFAIL, REDUCTION_WITH_ONLY_SPARSE_FIELDS_MESSAGE), ], } diff --git a/tests/next_tests/integration_tests/cases.py b/tests/next_tests/integration_tests/cases.py index ee0074e65f..634d85e64c 100644 --- a/tests/next_tests/integration_tests/cases.py +++ b/tests/next_tests/integration_tests/cases.py @@ -27,7 +27,7 @@ import gt4py.next as gtx from gt4py.eve import extended_typing as xtyping from gt4py.eve.extended_typing import Self -from gt4py.next import common +from gt4py.next import common, constructors from gt4py.next.ffront import decorator from gt4py.next.program_processors import processor_interface as ppi from gt4py.next.type_system import type_specifications as ts, type_translation @@ -129,12 +129,15 @@ def scalar_value(self) -> ScalarValue: def field( self, - backend: ppi.ProgramProcessor, + backend: ppi.ProgramExecutor, sizes: dict[gtx.Dimension, int], dtype: np.typing.DTypeLike, ) -> FieldValue: - return gtx.np_as_located_field(*sizes.keys())( - np.full(tuple(sizes.values()), self.value, dtype=dtype) + return constructors.full( + domain=common.domain(sizes), + fill_value=self.value, + dtype=dtype, + allocator=backend, ) @@ -155,7 +158,7 @@ def scalar_value(self) -> ScalarValue: def field( self, - backend: ppi.ProgramProcessor, + backend: ppi.ProgramExecutor, sizes: dict[gtx.Dimension, int], dtype: np.typing.DTypeLike, ) -> FieldValue: @@ -164,7 +167,9 @@ def field( f"`IndexInitializer` only supports fields with a single `Dimension`, got {sizes}." ) n_data = list(sizes.values())[0] - return gtx.np_as_located_field(*sizes.keys())(np.arange(0, n_data, dtype=dtype)) + return constructors.as_field( + domain=common.domain(sizes), data=np.arange(0, n_data, dtype=dtype), allocator=backend + ) def from_case( self: Self, @@ -202,8 +207,10 @@ def field( svals = tuple(sizes.values()) n_data = int(np.prod(svals)) self.start += n_data - return gtx.np_as_located_field(*sizes.keys())( - np.arange(start, start + n_data, dtype=dtype).reshape(svals) + return constructors.as_field( + common.domain(sizes), + np.arange(start, start + n_data, dtype=dtype).reshape(svals), + allocator=backend, ) def from_case( diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py index 93296ae85f..386e64451d 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py @@ -34,6 +34,7 @@ raise e import next_tests +import next_tests.exclusion_matrices as definitions def no_backend(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> None: @@ -43,18 +44,18 @@ def no_backend(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> Non OPTIONAL_PROCESSORS = [] if dace_iterator: - OPTIONAL_PROCESSORS.append(dace_iterator.run_dace_iterator) + OPTIONAL_PROCESSORS.append(definitions.OptionalProgramBackendId.DACE_CPU) @pytest.fixture( params=[ - roundtrip.executor, - gtfn.run_gtfn, - gtfn.run_gtfn_imperative, - gtfn.run_gtfn_with_temporaries, + definitions.ProgramBackendId.ROUNDTRIP, + definitions.ProgramBackendId.GTFN_CPU, + definitions.ProgramBackendId.GTFN_CPU_IMPERATIVE, + definitions.ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES, ] + OPTIONAL_PROCESSORS, - ids=lambda p: next_tests.get_processor_id(p), + ids=lambda p: p.short_id() if p is not None else "None", ) def fieldview_backend(request): """ @@ -63,16 +64,20 @@ def fieldview_backend(request): Notes: Check ADR 15 for details on the test-exclusion matrices. """ - backend = request.param - backend_id = next_tests.get_processor_id(backend) + backend_id = request.param + if backend_id is None: + backend = None + else: + backend = backend_id.load() + + for marker, skip_mark, msg in next_tests.exclusion_matrices.BACKEND_SKIP_TEST_MATRIX.get( + backend_id, [] + ): + if request.node.get_closest_marker(marker): + skip_mark(msg.format(marker=marker, backend=backend_id)) - for marker, skip_mark, msg in next_tests.exclusion_matrices.BACKEND_SKIP_TEST_MATRIX.get( - backend_id, [] - ): - if request.node.get_closest_marker(marker): - skip_mark(msg.format(marker=marker, backend=backend_id)) + backup_backend = decorator.DEFAULT_BACKEND - backup_backend = decorator.DEFAULT_BACKEND decorator.DEFAULT_BACKEND = no_backend yield backend decorator.DEFAULT_BACKEND = backup_backend @@ -203,8 +208,8 @@ def reduction_setup(): C2V=gtx.FieldOffset("C2V", source=Vertex, target=(Cell, c2vdim)), C2E=gtx.FieldOffset("C2E", source=Edge, target=(Cell, c2edim)), # inp=gtx.index_field(edge, dtype=np.int64), # TODO enable once we support gtx.index_fields in bindings - inp=gtx.np_as_located_field(Edge)(np.arange(num_edges, dtype=np.int32)), - out=gtx.np_as_located_field(Vertex)(np.zeros([num_vertices], dtype=np.int32)), + inp=gtx.as_field([Edge], np.arange(num_edges, dtype=np.int32)), + out=gtx.as_field([Vertex], np.zeros([num_vertices], dtype=np.int32)), offset_provider={ "V2E": gtx.NeighborTableOffsetProvider(v2e_arr, Vertex, Edge, 4), "E2V": gtx.NeighborTableOffsetProvider(e2v_arr, Edge, Vertex, 2, has_skip_values=False), diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py index f974e07ad8..d381a2242a 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py @@ -506,7 +506,7 @@ def testee(a: tuple[tuple[cases.IField, cases.IField], cases.IField]) -> cases.I def test_fieldop_from_scan(cartesian_case, forward): init = 1.0 expected = np.arange(init + 1.0, init + 1.0 + cartesian_case.default_sizes[IDim], 1) - out = gtx.np_as_located_field(KDim)(np.zeros((cartesian_case.default_sizes[KDim],))) + out = gtx.as_field([KDim], np.zeros((cartesian_case.default_sizes[KDim],))) if not forward: expected = np.flip(expected) @@ -637,8 +637,8 @@ def simple_scan_operator(carry: float, a: float) -> float: return carry if carry > a else carry + 1.0 k_size = cartesian_case.default_sizes[KDim] - a = gtx.np_as_located_field(KDim)(4.0 * np.ones((k_size,))) - out = gtx.np_as_located_field(KDim)(np.zeros((k_size,))) + a = gtx.as_field([KDim], 4.0 * np.ones((k_size,))) + out = gtx.as_field([KDim], np.zeros((k_size,))) cases.verify( cartesian_case, @@ -685,9 +685,9 @@ def testee(out: tuple[cases.KField, tuple[cases.KField, cases.KField]]): def test_scan_nested_tuple_input(cartesian_case): init = 1.0 k_size = cartesian_case.default_sizes[KDim] - inp1 = gtx.np_as_located_field(KDim)(np.ones((k_size,))) - inp2 = gtx.np_as_located_field(KDim)(np.arange(0.0, k_size, 1)) - out = gtx.np_as_located_field(KDim)(np.zeros((k_size,))) + inp1 = gtx.as_field([KDim], np.ones((k_size,))) + inp2 = gtx.as_field([KDim], np.arange(0.0, k_size, 1)) + out = gtx.as_field([KDim], np.zeros((k_size,))) def prev_levels_iterator(i): return range(i + 1) diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_external_local_field.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_external_local_field.py index dbc35ddfdf..04b27c6c17 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_external_local_field.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_external_local_field.py @@ -35,7 +35,7 @@ def testee( inp * ones(V2E), axis=V2EDim ) # multiplication with shifted `ones` because reduction of only non-shifted field with local dimension is not supported - inp = gtx.np_as_located_field(Vertex, V2EDim)(unstructured_case.offset_provider["V2E"].table) + inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table) ones = cases.allocate(unstructured_case, testee, "ones").strategy(cases.ConstInitializer(1))() cases.verify( @@ -56,7 +56,7 @@ def test_external_local_field_only(unstructured_case): def testee(inp: gtx.Field[[Vertex, V2EDim], int32]) -> gtx.Field[[Vertex], int32]: return neighbor_sum(inp, axis=V2EDim) - inp = gtx.np_as_located_field(Vertex, V2EDim)(unstructured_case.offset_provider["V2E"].table) + inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table) cases.verify( unstructured_case, diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gpu_backend.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gpu_backend.py index 381cc740c5..80e9a8e07a 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gpu_backend.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gpu_backend.py @@ -15,7 +15,6 @@ import pytest import gt4py.next as gtx -from gt4py.next.iterator import embedded from gt4py.next.program_processors.runners import dace_iterator, gtfn from next_tests.integration_tests import cases @@ -26,9 +25,9 @@ @pytest.mark.requires_gpu -@pytest.mark.parametrize("fieldview_backend", [dace_iterator.run_dace, gtfn.run_gtfn_gpu]) +@pytest.mark.parametrize("fieldview_backend", [dace_iterator.run_dace_gpu, gtfn.run_gtfn_gpu]) def test_copy(cartesian_case, fieldview_backend): # noqa: F811 # fixtures - import cupy as cp # TODO(ricoh): replace with storages solution when available + import cupy as cp @gtx.field_operator(backend=fieldview_backend) def testee(a: cases.IJKField) -> cases.IJKField: @@ -36,8 +35,17 @@ def testee(a: cases.IJKField) -> cases.IJKField: inp_arr = cp.full(shape=(3, 4, 5), fill_value=3, dtype=cp.int32) outp_arr = cp.zeros_like(inp_arr) - inp = embedded.np_as_located_field(cases.IDim, cases.JDim, cases.KDim)(inp_arr) - outp = embedded.np_as_located_field(cases.IDim, cases.JDim, cases.KDim)(outp_arr) + inp = gtx.as_field([cases.IDim, cases.JDim, cases.KDim], inp_arr) + outp = gtx.as_field([cases.IDim, cases.JDim, cases.KDim], outp_arr) testee(inp, out=outp, offset_provider={}) assert cp.allclose(inp_arr, outp_arr) + + inp_field = gtx.full( + [cases.IDim, cases.JDim, cases.KDim], fill_value=3, allocator=fieldview_backend + ) + out_field = gtx.zeros( + [cases.IDim, cases.JDim, cases.KDim], outp_arr, allocator=fieldview_backend + ) + testee(inp_field, out=out_field, offset_provider={}) + assert cp.allclose(inp_field.ndarray, out_field.ndarray) diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gt4py_builtins.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gt4py_builtins.py index 56d5e35b3a..8213f54a45 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gt4py_builtins.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_gt4py_builtins.py @@ -267,7 +267,7 @@ def conditional_program( conditional_shifted(mask, a, b, out=out) size = cartesian_case.default_sizes[IDim] + 1 - mask = gtx.np_as_located_field(IDim)(np.random.choice(a=[False, True], size=(size))) + mask = gtx.as_field([IDim], np.random.choice(a=[False, True], size=(size))) a = cases.allocate(cartesian_case, conditional_program, "a").extend({IDim: (0, 1)})() b = cases.allocate(cartesian_case, conditional_program, "b").extend({IDim: (0, 1)})() out = cases.allocate(cartesian_case, conditional_shifted, cases.RETURN)() diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_builtin_execution.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_builtin_execution.py index f7121dc82f..a5d2b92719 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_builtin_execution.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_builtin_execution.py @@ -18,7 +18,7 @@ import numpy as np import pytest -from gt4py.next import np_as_located_field +import gt4py.next as gtx from gt4py.next.ffront import dialect_ast_enums, fbuiltins, field_operator_ast as foast from gt4py.next.ffront.decorator import FieldOperator from gt4py.next.ffront.foast_passes.type_deduction import FieldOperatorTypeDeduction @@ -122,9 +122,9 @@ def test_math_function_builtins_execution(cartesian_case, builtin_name: str, inp else: ref_impl: Callable = getattr(np, builtin_name) - inps = [np_as_located_field(IDim)(np.asarray(input)) for input in inputs] + inps = [gtx.as_field([IDim], np.asarray(input)) for input in inputs] expected = ref_impl(*inputs) - out = np_as_located_field(IDim)(np.zeros_like(expected)) + out = gtx.as_field([IDim], np.zeros_like(expected)) builtin_field_op = make_builtin_field_operator(builtin_name).with_backend( cartesian_case.backend diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_unary_builtins.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_unary_builtins.py index 034ce56fee..5a277f9440 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_unary_builtins.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_math_unary_builtins.py @@ -90,7 +90,7 @@ def test_mod(cartesian_case): def mod_fieldop(inp1: cases.IField) -> cases.IField: return inp1 % 2 - inp1 = gtx.np_as_located_field(IDim)(np.asarray(range(10), dtype=int32) - 5) + inp1 = gtx.as_field([IDim], np.asarray(range(10), dtype=int32) - 5) out = cases.allocate(cartesian_case, mod_fieldop, cases.RETURN)() cases.verify(cartesian_case, mod_fieldop, inp1, out=out, ref=inp1 % 2) diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py index d86bc21679..7a1c827a0d 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py @@ -214,7 +214,7 @@ def prog( def test_wrong_argument_type(cartesian_case, copy_program_def): copy_program = gtx.program(copy_program_def, backend=cartesian_case.backend) - inp = gtx.np_as_located_field(JDim)(np.ones((cartesian_case.default_sizes[JDim],))) + inp = gtx.as_field([JDim], np.ones((cartesian_case.default_sizes[JDim],))) out = cases.allocate(cartesian_case, copy_program, "out").strategy(cases.ConstInitializer(1))() with pytest.raises(TypeError) as exc_info: diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_scalar_if.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_scalar_if.py index f9fd2c1353..e9c3ac8d19 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_scalar_if.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_scalar_if.py @@ -18,7 +18,8 @@ import numpy as np import pytest -from gt4py.next import Field, errors, field_operator, float64, index_field, np_as_located_field +import gt4py.next as gtx +from gt4py.next import Field, errors, field_operator, float64, index_field from next_tests.integration_tests import cases from next_tests.integration_tests.cases import ( diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py index e2bbbaa553..d5d57c9024 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py @@ -20,6 +20,7 @@ import pytest import gt4py.next as gtx +import gt4py.next.program_processors.processor_interface as ppi from gt4py.next.iterator import builtins as it_builtins from gt4py.next.iterator.builtins import ( and_, @@ -58,7 +59,7 @@ from next_tests.unit_tests.conftest import program_processor, run_processor -def asarray(*lists): +def array_maker(*lists): def _listify(val): if isinstance(val, Iterable): return val @@ -72,8 +73,8 @@ def _listify(val): IDim = gtx.Dimension("IDim") -def asfield(*arrays): - res = list(map(gtx.np_as_located_field(IDim), arrays)) +def field_maker(*arrays): + res = list(map(gtx.as_field.partial([IDim]), arrays)) return res @@ -171,8 +172,8 @@ def arithmetic_and_logical_test_data(): def test_arithmetic_and_logical_builtins(program_processor, builtin, inputs, expected, as_column): program_processor, validate = program_processor - inps = asfield(*asarray(*inputs)) - out = asfield((np.zeros_like(*asarray(expected))))[0] + inps = field_maker(*array_maker(*inputs)) + out = field_maker((np.zeros_like(*array_maker(expected))))[0] fencil(builtin, out, *inps, processor=program_processor, as_column=as_column) @@ -184,13 +185,16 @@ def test_arithmetic_and_logical_builtins(program_processor, builtin, inputs, exp def test_arithmetic_and_logical_functors_gtfn(builtin, inputs, expected): if builtin == if_: pytest.skip("If cannot be used unapplied") - inps = asfield(*asarray(*inputs)) - out = asfield((np.zeros_like(*asarray(expected))))[0] + inps = field_maker(*array_maker(*inputs)) + out = field_maker((np.zeros_like(*array_maker(expected))))[0] + gtfn_executor = run_gtfn.executor gtfn_without_transforms = dataclasses.replace( - run_gtfn, - otf_workflow=run_gtfn.otf_workflow.replace( - translation=run_gtfn.otf_workflow.translation.replace(enable_itir_transforms=False), + gtfn_executor, + otf_workflow=gtfn_executor.otf_workflow.replace( + translation=gtfn_executor.otf_workflow.translation.replace( + enable_itir_transforms=False + ), ), ) # avoid inlining the function fencil(builtin, out, *inps, processor=gtfn_without_transforms) @@ -202,6 +206,7 @@ def test_arithmetic_and_logical_functors_gtfn(builtin, inputs, expected): @pytest.mark.parametrize("builtin_name, inputs", math_builtin_test_data()) def test_math_function_builtins(program_processor, builtin_name, inputs, as_column): program_processor, validate = program_processor + # validate = ppi.is_program_backend(program_processor) if builtin_name == "gamma": # numpy has no gamma function @@ -209,10 +214,10 @@ def test_math_function_builtins(program_processor, builtin_name, inputs, as_colu else: ref_impl: Callable = getattr(np, builtin_name) - inps = asfield(*asarray(*inputs)) + inps = field_maker(*array_maker(*inputs)) expected = ref_impl(*inputs) - out = asfield((np.zeros_like(*asarray(expected))))[0] + out = field_maker((np.zeros_like(*array_maker(expected))))[0] fencil( getattr(it_builtins, builtin_name), @@ -251,8 +256,8 @@ def test_can_deref(program_processor, stencil): Node = gtx.Dimension("Node") - inp = gtx.np_as_located_field(Node)(np.ones((1,), dtype=np.int32)) - out = gtx.np_as_located_field(Node)(np.asarray([0], dtype=inp.dtype)) + inp = gtx.as_field([Node], np.ones((1,), dtype=np.int32)) + out = gtx.as_field([Node], np.asarray([0], dtype=inp.dtype)) no_neighbor_tbl = gtx.NeighborTableOffsetProvider(np.array([[-1]]), Node, Node, 1) run_processor( @@ -290,8 +295,8 @@ def test_can_deref(program_processor, stencil): # shifted = shift(Neighbor, 0)(inp) # return if_(can_deref(shifted), 1, -1) -# inp = gtx.np_as_located_field(Node)(np.zeros((1,))) -# out = gtx.np_as_located_field(Node)(np.asarray([0])) +# inp = gtx.as_field([Node], np.zeros((1,))) +# out = gtx.as_field([Node], np.asarray([0])) # no_neighbor_tbl = gtx.NeighborTableOffsetProvider(np.array([[None]]), Node, Node, 1) # _can_deref[{Node: range(1)}]( @@ -324,7 +329,7 @@ def test_cast(program_processor, as_column, input_value, dtype, np_dtype): program_processor, validate = program_processor column_axis = IDim if as_column else None - inp = asfield(np.array([input_value]))[0] + inp = field_maker(np.array([input_value]))[0] casted_valued = np_dtype(input_value) @@ -332,7 +337,7 @@ def test_cast(program_processor, as_column, input_value, dtype, np_dtype): def sten_cast(it, casted_valued): return eq(cast_(deref(it), dtype), deref(casted_valued)) - out = asfield(np.zeros_like(inp, dtype=builtins.bool))[0] + out = field_maker(np.zeros_like(inp, dtype=builtins.bool))[0] run_processor( sten_cast[{IDim: range(1)}], program_processor, diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_cartesian_offset_provider.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_cartesian_offset_provider.py index 05a7d4d9df..5c80d9e415 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_cartesian_offset_provider.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_cartesian_offset_provider.py @@ -52,8 +52,8 @@ def fencil_swapped(output, input): def test_cartesian_offset_provider(): - inp = gtx.np_as_located_field(I_loc, J_loc)(np.asarray([[0, 42], [1, 43]])) - out = gtx.np_as_located_field(I_loc, J_loc)(np.asarray([[-1]])) + inp = gtx.as_field([I_loc, J_loc], np.asarray([[0, 42], [1, 43]])) + out = gtx.as_field([I_loc, J_loc], np.asarray([[-1]])) fencil(out, inp) assert out[0][0] == 42 diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_conditional.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_conditional.py index c2517f1a07..de7ebf2869 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_conditional.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_conditional.py @@ -37,8 +37,8 @@ def test_conditional_w_tuple(program_processor): shape = [5] - inp = gtx.np_as_located_field(IDim)(np.random.randint(0, 2, shape, dtype=np.int32)) - out = gtx.np_as_located_field(IDim)(np.zeros(shape)) + inp = gtx.as_field([IDim], np.random.randint(0, 2, shape, dtype=np.int32)) + out = gtx.as_field([IDim], np.zeros(shape)) dom = { IDim: range(0, shape[0]), diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_constant.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_constant.py index c2d7ed5e59..83a86319b4 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_constant.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_constant.py @@ -30,8 +30,8 @@ def constant_stencil(): # this is traced as a lambda, TODO directly feed iterat return deref(inp) + deref(lift(constant_stencil)()) - inp = gtx.np_as_located_field(IDim)(np.asarray([0, 42], dtype=np.int32)) - res = gtx.np_as_located_field(IDim)(np.zeros_like(inp)) + inp = gtx.as_field([IDim], np.asarray([0, 42], dtype=np.int32)) + res = gtx.as_field([IDim], np.zeros_like(inp)) add_constant[{IDim: range(2)}](inp, out=res, offset_provider={}, backend=roundtrip.executor) diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_horizontal_indirection.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_horizontal_indirection.py index 75b935677b..f9bd2cc33b 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_horizontal_indirection.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_horizontal_indirection.py @@ -31,9 +31,7 @@ from gt4py.next.iterator.builtins import * from gt4py.next.iterator.runtime import fundef, offset from gt4py.next.program_processors.formatters import type_check -from gt4py.next.program_processors.formatters.gtfn import ( - format_sourcecode as gtfn_format_sourcecode, -) +from gt4py.next.program_processors.formatters.gtfn import format_cpp as gtfn_format_sourcecode from next_tests.integration_tests.cases import IDim from next_tests.unit_tests.conftest import program_processor, run_processor @@ -57,7 +55,7 @@ def test_simple_indirection(program_processor): program_processor, validate = program_processor if program_processor in [ - type_check.check, + type_check.check_type_inference, gtfn_format_sourcecode, ]: pytest.xfail( @@ -65,10 +63,10 @@ def test_simple_indirection(program_processor): ) # TODO fix test or generalize itir? shape = [8] - inp = gtx.np_as_located_field(IDim, origin={IDim: 1})(np.arange(0, shape[0] + 2)) + inp = gtx.as_field([IDim], np.arange(0, shape[0] + 2), origin={IDim: 1}) rng = np.random.default_rng() - cond = gtx.np_as_located_field(IDim)(rng.normal(size=shape)) - out = gtx.np_as_located_field(IDim)(np.zeros(shape, dtype=inp.dtype)) + cond = gtx.as_field([IDim], rng.normal(size=shape)) + out = gtx.as_field([IDim], np.zeros(shape, dtype=inp.dtype)) ref = np.zeros(shape, dtype=inp.dtype) for i in range(shape[0]): @@ -97,9 +95,9 @@ def test_direct_offset_for_indirection(program_processor): program_processor, validate = program_processor shape = [4] - inp = gtx.np_as_located_field(IDim)(np.asarray(range(shape[0]), dtype=np.float64)) - cond = gtx.np_as_located_field(IDim)(np.asarray([2, 1, -1, -2], dtype=np.int32)) - out = gtx.np_as_located_field(IDim)(np.zeros(shape, dtype=np.float64)) + inp = gtx.as_field([IDim], np.asarray(range(shape[0]), dtype=np.float64)) + cond = gtx.as_field([IDim], np.asarray([2, 1, -1, -2], dtype=np.int32)) + out = gtx.as_field([IDim], np.zeros(shape, dtype=np.float64)) ref = np.zeros(shape) for i in range(shape[0]): diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_implicit_fencil.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_implicit_fencil.py index d0dc8ec475..2df7691f9e 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_implicit_fencil.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_implicit_fencil.py @@ -33,11 +33,11 @@ def dom(): def a_field(): - return gtx.np_as_located_field(I)(np.arange(0, _isize, dtype=np.float64)) + return gtx.as_field([I], np.arange(0, _isize, dtype=np.float64)) def out_field(): - return gtx.np_as_located_field(I)(np.zeros(shape=(_isize,))) + return gtx.as_field([I], np.zeros(shape=(_isize,))) @fundef diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_scan.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_scan.py index e02dab0a72..3af0440c27 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_scan.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_scan.py @@ -30,10 +30,11 @@ def test_scan_in_stencil(program_processor, lift_mode): isize = 1 ksize = 3 Koff = offset("Koff") - inp = gtx.np_as_located_field(IDim, KDim)( - np.copy(np.broadcast_to(np.arange(0, ksize, dtype=np.float64), (isize, ksize))) + inp = gtx.as_field( + [IDim, KDim], + np.copy(np.broadcast_to(np.arange(0, ksize, dtype=np.float64), (isize, ksize))), ) - out = gtx.np_as_located_field(IDim, KDim)(np.zeros((isize, ksize))) + out = gtx.as_field([IDim, KDim], np.zeros((isize, ksize))) reference = np.zeros((isize, ksize - 1)) reference[:, 0] = inp.ndarray[:, 0] + inp.ndarray[:, 1] diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_strided_offset_provider.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_strided_offset_provider.py index 0ac38e9b9f..abdfffd74e 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_strided_offset_provider.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_strided_offset_provider.py @@ -56,12 +56,13 @@ def test_strided_offset_provider(program_processor): LocAB_size = LocA_size * max_neighbors rng = np.random.default_rng() - inp = gtx.np_as_located_field(LocAB)( + inp = gtx.as_field( + [LocAB], rng.normal( size=(LocAB_size,), - ) + ), ) - out = gtx.np_as_located_field(LocA)(np.zeros((LocA_size,))) + out = gtx.as_field([LocA], np.zeros((LocA_size,))) ref = np.sum(np.asarray(inp).reshape(LocA_size, max_neighbors), axis=-1) run_processor(fencil, program_processor, LocA_size, out, inp) diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_trivial.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_trivial.py index cc12183a24..8c59f994ee 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_trivial.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_trivial.py @@ -52,8 +52,8 @@ def test_trivial(program_processor, lift_mode): out = np.copy(inp) shape = (out.shape[0], out.shape[1]) - inp_s = gtx.np_as_located_field(IDim, JDim, origin={IDim: 0, JDim: 0})(inp[:, :, 0]) - out_s = gtx.np_as_located_field(IDim, JDim)(np.zeros_like(inp[:, :, 0])) + inp_s = gtx.as_field([IDim, JDim], inp[:, :, 0], origin={IDim: 0, JDim: 0}) + out_s = gtx.as_field([IDim, JDim], np.zeros_like(inp[:, :, 0])) run_processor( baz[cartesian_domain(named_range(IDim, 0, shape[0]), named_range(JDim, 0, shape[1]))], @@ -85,8 +85,8 @@ def test_shifted_arg_to_lift(program_processor, lift_mode): out[1:, :] = inp[:-1, :] shape = (out.shape[0], out.shape[1]) - inp_s = gtx.np_as_located_field(IDim, JDim, origin={IDim: 0, JDim: 0})(inp[:, :]) - out_s = gtx.np_as_located_field(IDim, JDim)(np.zeros_like(inp[:, :])) + inp_s = gtx.as_field([IDim, JDim], inp[:, :], origin={IDim: 0, JDim: 0}) + out_s = gtx.as_field([IDim, JDim], np.zeros_like(inp[:, :])) run_processor( stencil_shifted_arg_to_lift[ @@ -123,8 +123,8 @@ def test_direct_deref(program_processor, lift_mode): inp = rng.uniform(size=(5, 7)) out = np.copy(inp) - inp_s = gtx.np_as_located_field(IDim, JDim)(inp) - out_s = gtx.np_as_located_field(IDim, JDim)(np.zeros_like(inp)) + inp_s = gtx.as_field([IDim, JDim], inp) + out_s = gtx.as_field([IDim, JDim], np.zeros_like(inp)) run_processor( fen_direct_deref, @@ -153,8 +153,8 @@ def test_vertical_shift_unstructured(program_processor): rng = np.random.default_rng() inp = rng.uniform(size=(1, k_size)) - inp_s = gtx.np_as_located_field(IDim, KDim)(inp) - out_s = gtx.np_as_located_field(IDim, KDim)(np.zeros_like(inp)) + inp_s = gtx.as_field([IDim, KDim], inp) + out_s = gtx.as_field([IDim, KDim], np.zeros_like(inp)) run_processor( vertical_shift[ diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_tuple.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_tuple.py index 67b439507c..97a51508f5 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_tuple.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_tuple.py @@ -55,16 +55,18 @@ def test_tuple_output(program_processor, stencil): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) out = ( - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), ) dom = { @@ -98,27 +100,31 @@ def stencil(inp1, inp2, inp3, inp4): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp3 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp3 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp4 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp4 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) out = ( ( - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), ), ( - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), - gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), + gtx.as_field([IDim, JDim, KDim], np.zeros(shape)), ), ) @@ -166,15 +172,17 @@ def fencil(size0, size1, size2, inp1, inp2, out1, out2): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - out1 = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) - out2 = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out1 = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) + out2 = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) run_processor( fencil, @@ -215,19 +223,22 @@ def fencil(size0, size1, size2, inp1, inp2, inp3, out1, out2, out3): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp3 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp3 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - out1 = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) - out2 = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) - out3 = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out1 = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) + out2 = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) + out3 = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) run_processor( fencil, @@ -259,15 +270,17 @@ def test_field_of_extra_dim_output(program_processor, stencil): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) out_np = np.zeros(shape + [2]) - out = gtx.np_as_located_field(IDim, JDim, KDim, None)(out_np) + out = gtx.as_field([IDim, JDim, KDim, None], out_np) dom = { IDim: range(0, shape[0]), @@ -292,14 +305,16 @@ def test_tuple_field_input(program_processor): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp1 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( + inp2 = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2])), ) - out = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) dom = { IDim: range(0, shape[0]), @@ -322,8 +337,8 @@ def test_field_of_extra_dim_input(program_processor): inp2 = rng.normal(size=(shape[0], shape[1], shape[2])) inp = np.stack((inp1, inp2), axis=-1) - inp = gtx.np_as_located_field(IDim, JDim, KDim, None)(inp) - out = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + inp = gtx.as_field([IDim, JDim, KDim, None], inp) + out = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) dom = { IDim: range(0, shape[0]), @@ -353,20 +368,12 @@ def test_tuple_of_tuple_of_field_input(program_processor): shape = [5, 7, 9] rng = np.random.default_rng() - inp1 = gtx.np_as_located_field(IDim, JDim, KDim)( - rng.normal(size=(shape[0], shape[1], shape[2])) - ) - inp2 = gtx.np_as_located_field(IDim, JDim, KDim)( - rng.normal(size=(shape[0], shape[1], shape[2])) - ) - inp3 = gtx.np_as_located_field(IDim, JDim, KDim)( - rng.normal(size=(shape[0], shape[1], shape[2])) - ) - inp4 = gtx.np_as_located_field(IDim, JDim, KDim)( - rng.normal(size=(shape[0], shape[1], shape[2])) - ) + inp1 = gtx.as_field([IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2]))) + inp2 = gtx.as_field([IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2]))) + inp3 = gtx.as_field([IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2]))) + inp4 = gtx.as_field([IDim, JDim, KDim], rng.normal(size=(shape[0], shape[1], shape[2]))) - out = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) dom = { IDim: range(0, shape[0]), @@ -393,11 +400,11 @@ def test_field_of_2_extra_dim_input(program_processor): shape = [5, 7, 9] rng = np.random.default_rng() - inp = gtx.np_as_located_field(IDim, JDim, KDim, None, None)( - rng.normal(size=(shape[0], shape[1], shape[2], 2, 2)) + inp = gtx.as_field( + [IDim, JDim, KDim, None, None], rng.normal(size=(shape[0], shape[1], shape[2], 2, 2)) ) - out = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) dom = { IDim: range(0, shape[0]), diff --git a/tests/next_tests/integration_tests/feature_tests/test_util_cases.py b/tests/next_tests/integration_tests/feature_tests/test_util_cases.py index 3eaefa76de..3f229ef389 100644 --- a/tests/next_tests/integration_tests/feature_tests/test_util_cases.py +++ b/tests/next_tests/integration_tests/feature_tests/test_util_cases.py @@ -17,8 +17,8 @@ import gt4py.next as gtx from gt4py.next import errors -from gt4py.next.program_processors.runners import roundtrip +import next_tests.exclusion_matrices as definitions from next_tests.integration_tests import cases from next_tests.integration_tests.cases import ( # noqa: F401 # fixtures cartesian_case, @@ -70,7 +70,7 @@ def test_allocate_const(cartesian_case): # noqa: F811 # fixtures assert b == 42.0 -@pytest.mark.parametrize("fieldview_backend", [roundtrip.executor]) +@pytest.mark.parametrize("fieldview_backend", [~definitions.ProgramBackendId.ROUNDTRIP]) def test_verify_fails_with_wrong_reference(cartesian_case): # noqa: F811 # fixtures a = cases.allocate(cartesian_case, addition, "a")() b = cases.allocate(cartesian_case, addition, "b")() @@ -81,7 +81,7 @@ def test_verify_fails_with_wrong_reference(cartesian_case): # noqa: F811 # fixt cases.verify(cartesian_case, addition, a, b, out=out, ref=wrong_ref) -@pytest.mark.parametrize("fieldview_backend", [roundtrip.executor]) +@pytest.mark.parametrize("fieldview_backend", [~definitions.ProgramBackendId.ROUNDTRIP]) def test_verify_fails_with_wrong_type(cartesian_case): # noqa: F811 # fixtures a = cases.allocate(cartesian_case, addition, "a").dtype(np.float32)() b = cases.allocate(cartesian_case, addition, "b")() @@ -91,7 +91,7 @@ def test_verify_fails_with_wrong_type(cartesian_case): # noqa: F811 # fixtures cases.verify(cartesian_case, addition, a, b, out=out, ref=a + b) -@pytest.mark.parametrize("fieldview_backend", [roundtrip.executor]) +@pytest.mark.parametrize("fieldview_backend", [~definitions.ProgramBackendId.ROUNDTRIP]) def test_verify_with_default_data_fails_with_wrong_reference( cartesian_case, # noqa: F811 # fixtures ): diff --git a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_icon_like_scan.py b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_icon_like_scan.py index 64fb238470..108ee25862 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_icon_like_scan.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_icon_like_scan.py @@ -192,21 +192,17 @@ def test_setup(): class setup: cell_size = 14 k_size = 10 - z_alpha = gtx.np_as_located_field(Cell, KDim)( - np.random.default_rng().uniform(size=(cell_size, k_size + 1)) + z_alpha = gtx.as_field( + [Cell, KDim], np.random.default_rng().uniform(size=(cell_size, k_size + 1)) ) - z_beta = gtx.np_as_located_field(Cell, KDim)( - np.random.default_rng().uniform(size=(cell_size, k_size)) - ) - z_q = gtx.np_as_located_field(Cell, KDim)( - np.random.default_rng().uniform(size=(cell_size, k_size)) - ) - w = gtx.np_as_located_field(Cell, KDim)( - np.random.default_rng().uniform(size=(cell_size, k_size)) + z_beta = gtx.as_field( + [Cell, KDim], np.random.default_rng().uniform(size=(cell_size, k_size)) ) + z_q = gtx.as_field([Cell, KDim], np.random.default_rng().uniform(size=(cell_size, k_size))) + w = gtx.as_field([Cell, KDim], np.random.default_rng().uniform(size=(cell_size, k_size))) z_q_ref, w_ref = reference(z_alpha.ndarray, z_beta.ndarray, z_q.ndarray, w.ndarray) - dummy = gtx.np_as_located_field(Cell, KDim)(np.zeros((cell_size, k_size), dtype=bool)) - z_q_out = gtx.np_as_located_field(Cell, KDim)(np.zeros((cell_size, k_size))) + dummy = gtx.as_field([Cell, KDim], np.zeros((cell_size, k_size), dtype=bool)) + z_q_out = gtx.as_field([Cell, KDim], np.zeros((cell_size, k_size))) return setup() @@ -239,7 +235,7 @@ def test_solve_nonhydro_stencil_52_like_z_q_tup(test_setup, fieldview_backend): "Needs implementation of scan projector. Breaks in type inference as executed" "again after CollapseTuple." ) - if fieldview_backend == roundtrip.executor: + if fieldview_backend == roundtrip.backend: pytest.xfail("Needs proper handling of tuple[Column] <-> Column[tuple].") solve_nonhydro_stencil_52_like_z_q_tup.with_backend(fieldview_backend)( @@ -275,7 +271,7 @@ def test_solve_nonhydro_stencil_52_like(test_setup, fieldview_backend): def test_solve_nonhydro_stencil_52_like_with_gtfn_tuple_merge(test_setup, fieldview_backend): if fieldview_backend in [gtfn.run_gtfn_with_temporaries]: pytest.xfail("Temporary extraction does not work correctly in combination with scans.") - if fieldview_backend == roundtrip.executor: + if fieldview_backend == roundtrip.backend: pytest.xfail("Needs proper handling of tuple[Column] <-> Column[tuple].") solve_nonhydro_stencil_52_like_with_gtfn_tuple_merge.with_backend(fieldview_backend)( diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_anton_toy.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_anton_toy.py index 4e295e92af..829bc497cb 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_anton_toy.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_anton_toy.py @@ -90,10 +90,12 @@ def test_anton_toy(program_processor, lift_mode): shape = [5, 7, 9] rng = np.random.default_rng() - inp = gtx.np_as_located_field(IDim, JDim, KDim, origin={IDim: 1, JDim: 1, KDim: 0})( + inp = gtx.as_field( + [IDim, JDim, KDim], rng.normal(size=(shape[0] + 2, shape[1] + 2, shape[2])), + origin={IDim: 1, JDim: 1, KDim: 0}, ) - out = gtx.np_as_located_field(IDim, JDim, KDim)(np.zeros(shape)) + out = gtx.as_field([IDim, JDim, KDim], np.zeros(shape)) ref = naive_lap(inp) run_processor( diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_column_stencil.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_column_stencil.py index 04cf8c6f9c..d05b14d73d 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_column_stencil.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_column_stencil.py @@ -56,15 +56,17 @@ def shift_stencil(inp): ( shift_stencil, lambda inp: np.asarray(inp)[1:, 1:], - lambda shape: gtx.np_as_located_field(IDim, KDim)( - np.fromfunction(lambda i, k: i * 10 + k, [shape[0] + 1, shape[1] + 1]) + lambda shape: gtx.as_field( + [IDim, KDim], np.fromfunction(lambda i, k: i * 10 + k, [shape[0] + 1, shape[1] + 1]) ), ), ( shift_stencil, lambda inp: np.asarray(inp)[1:, 2:], - lambda shape: gtx.np_as_located_field(IDim, KDim, origin={IDim: 0, KDim: 1})( - np.fromfunction(lambda i, k: i * 10 + k, [shape[0] + 1, shape[1] + 2]) + lambda shape: gtx.as_field( + [IDim, KDim], + np.fromfunction(lambda i, k: i * 10 + k, [shape[0] + 1, shape[1] + 2]), + origin={IDim: 0, KDim: 1}, ), ), ], @@ -81,11 +83,11 @@ def test_basic_column_stencils(program_processor, lift_mode, basic_stencils): shape = [5, 7] inp = ( - gtx.np_as_located_field(IDim, KDim)(np.fromfunction(lambda i, k: i * 10 + k, shape)) + gtx.as_field([IDim, KDim], np.fromfunction(lambda i, k: i * 10 + k, shape)) if inp_fun is None else inp_fun(shape) ) - out = gtx.np_as_located_field(IDim, KDim)(np.zeros(shape)) + out = gtx.as_field([IDim, KDim], np.zeros(shape)) ref = ref_fun(inp) @@ -129,21 +131,21 @@ def k_level_condition_upper_tuple(k_idx, k_level): ( k_level_condition_lower, lambda inp: 0, - lambda k_size: gtx.np_as_located_field(KDim)(np.arange(k_size, dtype=np.int32)), + lambda k_size: gtx.as_field([KDim], np.arange(k_size, dtype=np.int32)), lambda inp: np.concatenate([[0], inp[:-1]]), ), ( k_level_condition_upper, lambda inp: inp.shape[0] - 1, - lambda k_size: gtx.np_as_located_field(KDim)(np.arange(k_size, dtype=np.int32)), + lambda k_size: gtx.as_field([KDim], np.arange(k_size, dtype=np.int32)), lambda inp: np.concatenate([inp[1:], [0]]), ), ( k_level_condition_upper_tuple, lambda inp: inp[0].shape[0] - 1, lambda k_size: ( - gtx.np_as_located_field(KDim)(np.arange(k_size, dtype=np.int32)), - gtx.np_as_located_field(KDim)(np.arange(k_size, dtype=np.int32)), + gtx.as_field([KDim], np.arange(k_size, dtype=np.int32)), + gtx.as_field([KDim], np.arange(k_size, dtype=np.int32)), ), lambda inp: np.concatenate([(inp[0][1:] + inp[1][1:]), [0]]), ), @@ -157,7 +159,7 @@ def test_k_level_condition(program_processor, lift_mode, fun, k_level, inp_funct inp = inp_function(k_size) ref = ref_function(inp) - out = gtx.np_as_located_field(KDim)(np.zeros((5,), dtype=np.int32)) + out = gtx.as_field([KDim], np.zeros((5,), dtype=np.int32)) run_processor( fun[{KDim: range(0, k_size)}], @@ -204,8 +206,8 @@ def ksum_fencil(i_size, k_start, k_end, inp, out): def test_ksum_scan(program_processor, lift_mode, kstart, reference): program_processor, validate = program_processor shape = [1, 7] - inp = gtx.np_as_located_field(IDim, KDim)(np.array(np.broadcast_to(np.arange(0.0, 7.0), shape))) - out = gtx.np_as_located_field(IDim, KDim)(np.zeros(shape, dtype=inp.dtype)) + inp = gtx.as_field([IDim, KDim], np.array(np.broadcast_to(np.arange(0.0, 7.0), shape))) + out = gtx.as_field([IDim, KDim], np.zeros(shape, dtype=inp.dtype)) run_processor( ksum_fencil, @@ -241,8 +243,8 @@ def ksum_back_fencil(i_size, k_size, inp, out): def test_ksum_back_scan(program_processor, lift_mode): program_processor, validate = program_processor shape = [1, 7] - inp = gtx.np_as_located_field(IDim, KDim)(np.array(np.broadcast_to(np.arange(0.0, 7.0), shape))) - out = gtx.np_as_located_field(IDim, KDim)(np.zeros(shape, dtype=inp.dtype)) + inp = gtx.as_field([IDim, KDim], np.array(np.broadcast_to(np.arange(0.0, 7.0), shape))) + out = gtx.as_field([IDim, KDim], np.zeros(shape, dtype=inp.dtype)) ref = np.asarray([[21, 21, 20, 18, 15, 11, 6]]) @@ -304,11 +306,11 @@ def test_kdoublesum_scan(program_processor, lift_mode, kstart, reference): program_processor, validate = program_processor pytest.xfail("structured dtype input/output currently unsupported") shape = [1, 7] - inp0 = gtx.np_as_located_field(IDim, KDim)(np.asarray([list(range(7))], dtype=np.float64)) - inp1 = gtx.np_as_located_field(IDim, KDim)(np.asarray([list(range(7))], dtype=np.int32)) + inp0 = gtx.as_field([IDim, KDim], np.asarray([list(range(7))], dtype=np.float64)) + inp1 = gtx.as_field([IDim, KDim], np.asarray([list(range(7))], dtype=np.int32)) out = ( - gtx.np_as_located_field(IDim, KDim)(np.zeros(shape, dtype=np.float64)), - gtx.np_as_located_field(IDim, KDim)(np.zeros(shape, dtype=np.float32)), + gtx.as_field([IDim, KDim], np.zeros(shape, dtype=np.float64)), + gtx.as_field([IDim, KDim], np.zeros(shape, dtype=np.float32)), ) run_processor( @@ -348,9 +350,9 @@ def test_different_vertical_sizes(program_processor): program_processor, validate = program_processor k_size = 10 - inp0 = gtx.np_as_located_field(KDim)(np.arange(0, k_size)) - inp1 = gtx.np_as_located_field(KDim)(np.arange(0, k_size + 1)) - out = gtx.np_as_located_field(KDim)(np.zeros(k_size, dtype=inp0.dtype)) + inp0 = gtx.as_field([KDim], np.arange(0, k_size)) + inp1 = gtx.as_field([KDim], np.arange(0, k_size + 1)) + out = gtx.as_field([KDim], np.zeros(k_size, dtype=inp0.dtype)) ref = inp0.ndarray + inp1.ndarray[1:] run_processor( @@ -387,9 +389,9 @@ def test_different_vertical_sizes_with_origin(program_processor): program_processor, validate = program_processor k_size = 10 - inp0 = gtx.np_as_located_field(KDim)(np.arange(0, k_size)) - inp1 = gtx.np_as_located_field(KDim, origin={KDim: 1})(np.arange(0, k_size + 1)) - out = gtx.np_as_located_field(KDim)(np.zeros(k_size, dtype=np.int64)) + inp0 = gtx.as_field([KDim], np.arange(0, k_size)) + inp1 = gtx.as_field([KDim], np.arange(0, k_size + 1), origin={KDim: 1}) + out = gtx.as_field([KDim], np.zeros(k_size, dtype=np.int64)) ref = np.asarray(inp0) + np.asarray(inp1)[:-1] run_processor( diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_fvm_nabla.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_fvm_nabla.py index 445b73548b..47867b9a64 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_fvm_nabla.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_fvm_nabla.py @@ -138,10 +138,10 @@ def test_compute_zavgS(program_processor, lift_mode): program_processor, validate = program_processor setup = nabla_setup() - pp = gtx.np_as_located_field(Vertex)(setup.input_field) - S_MXX, S_MYY = tuple(map(gtx.np_as_located_field(Edge), setup.S_fields)) + pp = gtx.as_field([Vertex], setup.input_field) + S_MXX, S_MYY = tuple(map(gtx.as_field.partial([Edge]), setup.S_fields)) - zavgS = gtx.np_as_located_field(Edge)(np.zeros((setup.edges_size))) + zavgS = gtx.as_field([Edge], np.zeros((setup.edges_size))) e2v = gtx.NeighborTableOffsetProvider( AtlasTable(setup.edges2node_connectivity), Edge, Vertex, 2 @@ -197,13 +197,13 @@ def test_compute_zavgS2(program_processor, lift_mode): program_processor, validate = program_processor setup = nabla_setup() - pp = gtx.np_as_located_field(Vertex)(setup.input_field) + pp = gtx.as_field([Vertex], setup.input_field) - S = tuple(gtx.np_as_located_field(Edge)(s) for s in setup.S_fields) + S = tuple(gtx.as_field([Edge], s) for s in setup.S_fields) zavgS = ( - gtx.np_as_located_field(Edge)(np.zeros((setup.edges_size))), - gtx.np_as_located_field(Edge)(np.zeros((setup.edges_size))), + gtx.as_field([Edge], np.zeros((setup.edges_size))), + gtx.as_field([Edge], np.zeros((setup.edges_size))), ) e2v = gtx.NeighborTableOffsetProvider( @@ -236,13 +236,13 @@ def test_nabla(program_processor, lift_mode): pytest.xfail("shifted input arguments not supported for lift_mode != LiftMode.FORCE_INLINE") setup = nabla_setup() - sign = gtx.np_as_located_field(Vertex, V2EDim)(setup.sign_field) - pp = gtx.np_as_located_field(Vertex)(setup.input_field) - S_MXX, S_MYY = tuple(map(gtx.np_as_located_field(Edge), setup.S_fields)) - vol = gtx.np_as_located_field(Vertex)(setup.vol_field) + sign = gtx.as_field([Vertex, V2EDim], setup.sign_field) + pp = gtx.as_field([Vertex], setup.input_field) + S_MXX, S_MYY = tuple(map(gtx.as_field.partial([Edge]), setup.S_fields)) + vol = gtx.as_field([Vertex], setup.vol_field) - pnabla_MXX = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) - pnabla_MYY = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) + pnabla_MXX = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) + pnabla_MYY = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) e2v = gtx.NeighborTableOffsetProvider( AtlasTable(setup.edges2node_connectivity), Edge, Vertex, 2 @@ -294,13 +294,13 @@ def test_nabla2(program_processor, lift_mode): program_processor, validate = program_processor setup = nabla_setup() - sign = gtx.np_as_located_field(Vertex, V2EDim)(setup.sign_field) - pp = gtx.np_as_located_field(Vertex)(setup.input_field) - S_M = tuple(gtx.np_as_located_field(Edge)(s) for s in setup.S_fields) - vol = gtx.np_as_located_field(Vertex)(setup.vol_field) + sign = gtx.as_field([Vertex, V2EDim], setup.sign_field) + pp = gtx.as_field([Vertex], setup.input_field) + S_M = tuple(gtx.as_field([Edge], s) for s in setup.S_fields) + vol = gtx.as_field([Vertex], setup.vol_field) - pnabla_MXX = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) - pnabla_MYY = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) + pnabla_MXX = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) + pnabla_MYY = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) e2v = gtx.NeighborTableOffsetProvider( AtlasTable(setup.edges2node_connectivity), Edge, Vertex, 2 @@ -375,13 +375,13 @@ def test_nabla_sign(program_processor, lift_mode): pytest.xfail("test is broken due to bad lift semantics in iterator IR") setup = nabla_setup() - is_pole_edge = gtx.np_as_located_field(Edge)(setup.is_pole_edge_field) - pp = gtx.np_as_located_field(Vertex)(setup.input_field) - S_MXX, S_MYY = tuple(map(gtx.np_as_located_field(Edge), setup.S_fields)) - vol = gtx.np_as_located_field(Vertex)(setup.vol_field) + is_pole_edge = gtx.as_field([Edge], setup.is_pole_edge_field) + pp = gtx.as_field([Vertex], setup.input_field) + S_MXX, S_MYY = tuple(map(gtx.as_field.partial([Edge]), setup.S_fields)) + vol = gtx.as_field([Vertex], setup.vol_field) - pnabla_MXX = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) - pnabla_MYY = gtx.np_as_located_field(Vertex)(np.zeros((setup.nodes_size))) + pnabla_MXX = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) + pnabla_MYY = gtx.as_field([Vertex], np.zeros((setup.nodes_size))) e2v = gtx.NeighborTableOffsetProvider( AtlasTable(setup.edges2node_connectivity), Edge, Vertex, 2 diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_hdiff.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_hdiff.py index af70dd590f..8aabd18267 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_hdiff.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_hdiff.py @@ -88,9 +88,9 @@ def test_hdiff(hdiff_reference, program_processor, lift_mode): inp, coeff, out = hdiff_reference shape = (out.shape[0], out.shape[1]) - inp_s = gtx.np_as_located_field(IDim, JDim, origin={IDim: 2, JDim: 2})(inp[:, :, 0]) - coeff_s = gtx.np_as_located_field(IDim, JDim)(coeff[:, :, 0]) - out_s = gtx.np_as_located_field(IDim, JDim)(np.zeros_like(coeff[:, :, 0])) + inp_s = gtx.as_field([IDim, JDim], inp[:, :, 0], origin={IDim: 2, JDim: 2}) + coeff_s = gtx.as_field([IDim, JDim], coeff[:, :, 0]) + out_s = gtx.as_field([IDim, JDim], np.zeros_like(coeff[:, :, 0])) run_processor( hdiff, program_processor, inp_s, coeff_s, out_s, shape[0], shape[1], lift_mode=lift_mode diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_vertical_advection.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_vertical_advection.py index a0471e8baa..29c82442ea 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_vertical_advection.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_vertical_advection.py @@ -122,7 +122,7 @@ def test_tridiag(fencil, tridiag_reference, program_processor, lift_mode): gtfn.run_gtfn, gtfn.run_gtfn_imperative, gtfn.run_gtfn_with_temporaries, - gtfn_formatters.format_sourcecode, + gtfn_formatters.format_cpp, ] and lift_mode == LiftMode.FORCE_INLINE ): @@ -134,7 +134,7 @@ def test_tridiag(fencil, tridiag_reference, program_processor, lift_mode): pytest.xfail("tuple_get on columns not supported.") a, b, c, d, x = tridiag_reference shape = a.shape - as_3d_field = gtx.np_as_located_field(IDim, JDim, KDim) + as_3d_field = gtx.as_field.partial([IDim, JDim, KDim]) a_s = as_3d_field(a) b_s = as_3d_field(b) c_s = as_3d_field(c) diff --git a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_with_toy_connectivity.py b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_with_toy_connectivity.py index d475fab3a8..6354e45451 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_with_toy_connectivity.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/iterator_tests/test_with_toy_connectivity.py @@ -52,11 +52,11 @@ def edge_index_field(): # TODO replace by gtx.index_field once supported in bindings - return gtx.np_as_located_field(Edge)(np.arange(e2v_arr.shape[0], dtype=np.int32)) + return gtx.as_field([Edge], np.arange(e2v_arr.shape[0], dtype=np.int32)) def vertex_index_field(): # TODO replace by gtx.index_field once supported in bindings - return gtx.np_as_located_field(Vertex)(np.arange(v2e_arr.shape[0], dtype=np.int32)) + return gtx.as_field([Vertex], np.arange(v2e_arr.shape[0], dtype=np.int32)) @fundef @@ -87,7 +87,7 @@ def sum_edges_to_vertices_reduce(in_edges): def test_sum_edges_to_vertices(program_processor, lift_mode, stencil): program_processor, validate = program_processor inp = edge_index_field() - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = np.asarray(list(sum(row) for row in v2e_arr)) run_processor( @@ -110,7 +110,7 @@ def map_neighbors(in_edges): def test_map_neighbors(program_processor, lift_mode): program_processor, validate = program_processor inp = edge_index_field() - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = 2 * np.sum(v2e_arr, axis=1) run_processor( @@ -134,7 +134,7 @@ def map_make_const_list(in_edges): def test_map_make_const_list(program_processor, lift_mode): program_processor, validate = program_processor inp = edge_index_field() - out = gtx.np_as_located_field(Vertex)(np.zeros([9], inp.dtype)) + out = gtx.as_field([Vertex], np.zeros([9], inp.dtype)) ref = 2 * np.sum(v2e_arr, axis=1) run_processor( @@ -157,7 +157,7 @@ def first_vertex_neigh_of_first_edge_neigh_of_cells(in_vertices): def test_first_vertex_neigh_of_first_edge_neigh_of_cells_fencil(program_processor, lift_mode): program_processor, validate = program_processor inp = vertex_index_field() - out = gtx.np_as_located_field(Cell)(np.zeros([9], dtype=inp.dtype)) + out = gtx.as_field([Cell], np.zeros([9], dtype=inp.dtype)) ref = np.asarray(list(v2e_arr[c[0]][0] for c in c2e_arr)) run_processor( @@ -183,9 +183,9 @@ def sparse_stencil(non_sparse, inp): def test_sparse_input_field(program_processor, lift_mode): program_processor, validate = program_processor - non_sparse = gtx.np_as_located_field(Edge)(np.zeros(18, dtype=np.int32)) - inp = gtx.np_as_located_field(Vertex, V2EDim)(np.asarray([[1, 2, 3, 4]] * 9, dtype=np.int32)) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + non_sparse = gtx.as_field([Edge], np.zeros(18, dtype=np.int32)) + inp = gtx.as_field([Vertex, V2EDim], np.asarray([[1, 2, 3, 4]] * 9, dtype=np.int32)) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = np.ones([9]) * 10 @@ -206,9 +206,9 @@ def test_sparse_input_field(program_processor, lift_mode): def test_sparse_input_field_v2v(program_processor, lift_mode): program_processor, validate = program_processor - non_sparse = gtx.np_as_located_field(Edge)(np.zeros(18, dtype=np.int32)) - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + non_sparse = gtx.as_field([Edge], np.zeros(18, dtype=np.int32)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = np.asarray(list(sum(row) for row in v2v_arr)) @@ -237,8 +237,8 @@ def slice_sparse_stencil(sparse): @pytest.mark.uses_sparse_fields def test_slice_sparse(program_processor, lift_mode): program_processor, validate = program_processor - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = v2v_arr[:, 1] @@ -265,8 +265,8 @@ def slice_twice_sparse_stencil(sparse): @pytest.mark.xfail(reason="Field with more than one sparse dimension is not implemented.") def test_slice_twice_sparse(program_processor, lift_mode): program_processor, validate = program_processor - inp = gtx.np_as_located_field(Vertex, V2VDim, V2VDim)(v2v_arr[v2v_arr]) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim, V2VDim], v2v_arr[v2v_arr]) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = v2v_arr[v2v_arr][:, 2, 1] run_processor( @@ -292,8 +292,8 @@ def shift_sliced_sparse_stencil(sparse): @pytest.mark.uses_sparse_fields def test_shift_sliced_sparse(program_processor, lift_mode): program_processor, validate = program_processor - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = v2v_arr[:, 1][v2v_arr][:, 0] @@ -320,8 +320,8 @@ def slice_shifted_sparse_stencil(sparse): @pytest.mark.uses_sparse_fields def test_slice_shifted_sparse(program_processor, lift_mode): program_processor, validate = program_processor - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = v2v_arr[:, 1][v2v_arr][:, 0] @@ -353,7 +353,7 @@ def lift_stencil(inp): def test_lift(program_processor, lift_mode): program_processor, validate = program_processor inp = vertex_index_field() - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = np.asarray(np.asarray(range(9))) run_processor( @@ -376,8 +376,8 @@ def sparse_shifted_stencil(inp): @pytest.mark.uses_sparse_fields def test_shift_sparse_input_field(program_processor, lift_mode): program_processor, validate = program_processor - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = np.asarray(np.asarray(range(9))) run_processor( @@ -415,9 +415,9 @@ def test_shift_sparse_input_field2(program_processor, lift_mode): "Bug in bindings/compilation/caching: only the first program seems to be compiled." ) # observed in `cache.Strategy.PERSISTENT` mode inp = vertex_index_field() - inp_sparse = gtx.np_as_located_field(Edge, E2VDim)(e2v_arr) - out1 = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) - out2 = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp_sparse = gtx.as_field([Edge, E2VDim], e2v_arr) + out1 = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) + out2 = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) offset_provider = { "E2V": gtx.NeighborTableOffsetProvider(e2v_arr, Edge, Vertex, 2), @@ -461,8 +461,8 @@ def test_sparse_shifted_stencil_reduce(program_processor, lift_mode): if lift_mode != transforms.LiftMode.FORCE_INLINE: pytest.xfail("shifted input arguments not supported for lift_mode != LiftMode.FORCE_INLINE") - inp = gtx.np_as_located_field(Vertex, V2VDim)(v2v_arr) - out = gtx.np_as_located_field(Vertex)(np.zeros([9], dtype=inp.dtype)) + inp = gtx.as_field([Vertex, V2VDim], v2v_arr) + out = gtx.as_field([Vertex], np.zeros([9], dtype=inp.dtype)) ref = [] for row in v2v_arr: diff --git a/tests/next_tests/integration_tests/multi_feature_tests/otf_tests/test_gtfn_workflow.py b/tests/next_tests/integration_tests/multi_feature_tests/otf_tests/test_gtfn_workflow.py index c60079eaf1..d851c5560a 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/otf_tests/test_gtfn_workflow.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/otf_tests/test_gtfn_workflow.py @@ -32,10 +32,8 @@ def test_different_buffer_sizes(): out_nx = 5 out_ny = 5 - inp = gtx.np_as_located_field(IDim, JDim)( - np.reshape(np.arange(nx * ny, dtype=np.int32), (nx, ny)) - ) - out = gtx.np_as_located_field(IDim, JDim)(np.zeros((out_nx, out_ny), dtype=np.int32)) + inp = gtx.as_field([IDim, JDim], np.reshape(np.arange(nx * ny, dtype=np.int32), (nx, ny))) + out = gtx.as_field([IDim, JDim], np.zeros((out_nx, out_ny), dtype=np.int32)) @gtx.field_operator(backend=gtfn.run_gtfn) def copy(inp: gtx.Field[[IDim, JDim], gtx.int32]) -> gtx.Field[[IDim, JDim], gtx.int32]: diff --git a/tests/next_tests/unit_tests/conftest.py b/tests/next_tests/unit_tests/conftest.py index 747431599a..b43eeb3f91 100644 --- a/tests/next_tests/unit_tests/conftest.py +++ b/tests/next_tests/unit_tests/conftest.py @@ -14,16 +14,13 @@ from __future__ import annotations -from dataclasses import dataclass +import dataclasses import pytest import gt4py.next as gtx -from gt4py import eve -from gt4py.next.iterator import ir as itir, pretty_parser, pretty_printer, runtime, transforms +from gt4py.next.iterator import runtime, transforms from gt4py.next.program_processors import processor_interface as ppi -from gt4py.next.program_processors.formatters import gtfn as gtfn_formatters, lisp, type_check -from gt4py.next.program_processors.runners import double_roundtrip, gtfn, roundtrip try: @@ -35,6 +32,7 @@ raise e import next_tests +import next_tests.exclusion_matrices as definitions @pytest.fixture( @@ -49,60 +47,48 @@ def lift_mode(request): return request.param -class _RemoveITIRSymTypes(eve.NodeTranslator): - def visit_Sym(self, node: itir.Sym) -> itir.Sym: - return itir.Sym(id=node.id, dtype=None, kind=None) - - -@ppi.program_formatter -def pretty_format_and_check(root: itir.FencilDefinition, *args, **kwargs) -> str: - # remove types from ITIR as they are not supported for the roundtrip - root = _RemoveITIRSymTypes().visit(root) - pretty = pretty_printer.pformat(root) - parsed = pretty_parser.pparse(pretty) - assert parsed == root - return pretty - - OPTIONAL_PROCESSORS = [] if dace_iterator: - OPTIONAL_PROCESSORS.append((dace_iterator.run_dace_iterator, True)) + OPTIONAL_PROCESSORS.append((definitions.OptionalProgramBackendId.DACE_CPU, True)) @pytest.fixture( params=[ - # (processor, do_validate) (None, True), - (lisp.format_lisp, False), - (pretty_format_and_check, False), - (roundtrip.executor, True), - (type_check.check, False), - (double_roundtrip.executor, True), - (gtfn.run_gtfn, True), - (gtfn.run_gtfn_imperative, True), - (gtfn.run_gtfn_with_temporaries, True), - (gtfn_formatters.format_sourcecode, False), + (definitions.ProgramBackendId.ROUNDTRIP, True), + (definitions.ProgramBackendId.DOUBLE_ROUNDTRIP, True), + (definitions.ProgramBackendId.GTFN_CPU, True), + (definitions.ProgramBackendId.GTFN_CPU_IMPERATIVE, True), + (definitions.ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES, True), + (definitions.ProgramFormatterId.LISP_FORMATTER, False), + (definitions.ProgramFormatterId.ITIR_PRETTY_PRINTER, False), + (definitions.ProgramFormatterId.ITIR_TYPE_CHECKER, False), + (definitions.ProgramFormatterId.GTFN_CPP_FORMATTER, False), ] + OPTIONAL_PROCESSORS, - ids=lambda p: next_tests.get_processor_id(p[0]), + ids=lambda p: p[0].short_id() if p[0] is not None else "None", ) -def program_processor(request): +def program_processor(request) -> tuple[ppi.ProgramProcessor, bool]: """ Fixture creating program processors on-demand for tests. Notes: Check ADR 15 for details on the test-exclusion matrices. """ - backend, _ = request.param - backend_id = next_tests.get_processor_id(backend) + processor_id, is_backend = request.param + if processor_id is None: + return None, is_backend + + processor = processor_id.load() + assert is_backend == ppi.is_program_backend(processor) for marker, skip_mark, msg in next_tests.exclusion_matrices.BACKEND_SKIP_TEST_MATRIX.get( - backend_id, [] + processor_id, [] ): if request.node.get_closest_marker(marker): - skip_mark(msg.format(marker=marker, backend=backend_id)) + skip_mark(msg.format(marker=marker, backend=processor_id)) - return request.param + return processor, is_backend def run_processor( @@ -119,7 +105,7 @@ def run_processor( raise TypeError(f"program processor kind not recognized: {processor}!") -@dataclass +@dataclasses.dataclass class DummyConnectivity: max_neighbors: int has_skip_values: int diff --git a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py index 95093c8307..8a4b4cbd84 100644 --- a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py +++ b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py @@ -20,8 +20,8 @@ import numpy as np import pytest -from gt4py.next import Dimension, common -from gt4py.next.common import Domain, UnitRange +from gt4py.next import common, constructors +from gt4py.next.common import Dimension, Domain, UnitRange from gt4py.next.embedded import exceptions as embedded_exceptions, nd_array_field from gt4py.next.embedded.nd_array_field import _get_slices_from_domain_slice from gt4py.next.ffront import fbuiltins diff --git a/tests/next_tests/unit_tests/iterator_tests/test_runtime_domain.py b/tests/next_tests/unit_tests/iterator_tests/test_runtime_domain.py index ed7daa3cff..232995be58 100644 --- a/tests/next_tests/unit_tests/iterator_tests/test_runtime_domain.py +++ b/tests/next_tests/unit_tests/iterator_tests/test_runtime_domain.py @@ -50,14 +50,15 @@ def test_deduce_domain(): def test_embedded_error_on_wrong_domain(): dom = CartesianDomain([("I", range(1))]) - out = gtx.np_as_located_field(I)( + out = gtx.as_field( + [I], np.zeros( 1, - ) + ), ) with pytest.raises(RuntimeError, match="expected `UnstructuredDomain`"): foo[dom]( - gtx.np_as_located_field(I)(np.zeros((1,))), + gtx.as_field([I], np.zeros((1,))), out=out, offset_provider={"bar": connectivity}, ) diff --git a/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py b/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py index ae5f582e47..4e865452f6 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py +++ b/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py @@ -57,7 +57,7 @@ def fencil_example(): ) IDim = gtx.Dimension("I") params = [ - gtx.np_as_located_field(IDim)(np.empty((1,), dtype=np.float32)), + gtx.as_field([IDim], np.empty((1,), dtype=np.float32)), np.float32(3.14), ] return fencil, params diff --git a/tests/next_tests/unit_tests/program_processor_tests/test_processor_interface.py b/tests/next_tests/unit_tests/program_processor_tests/test_processor_interface.py index 6cd8d43c3b..05e982cf0c 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/test_processor_interface.py +++ b/tests/next_tests/unit_tests/program_processor_tests/test_processor_interface.py @@ -14,15 +14,49 @@ import pytest +import gt4py.next.allocators as next_allocators from gt4py.next.iterator import ir as itir from gt4py.next.program_processors.processor_interface import ( + ProgramBackend, ProgramExecutor, ProgramFormatter, + ProgramProcessor, ensure_processor_kind, + is_processor_kind, + is_program_backend, + make_program_processor, program_formatter, ) +def test_make_program_processor(dummy_formatter): + def my_func(program: itir.FencilDefinition, *args, **kwargs) -> None: + return None + + processor = make_program_processor(my_func, ProgramExecutor) + assert is_processor_kind(processor, ProgramExecutor) + assert processor.__name__ == my_func.__name__ + assert processor(None) == my_func(None) + + def other_func(program: itir.FencilDefinition, *args, **kwargs) -> str: + return f"{args}, {kwargs}" + + processor = make_program_processor( + other_func, ProgramFormatter, name="new_name", accept_args=2, accept_kwargs=["a", "b"] + ) + assert is_processor_kind(processor, ProgramFormatter) + assert processor.__name__ == "new_name" + assert processor(None) == other_func(None) + assert processor(1, 2, a="A", b="B") == other_func(1, 2, a="A", b="B") + assert processor(1, 2, 3, 4, a="A", b="B", c="C") != other_func(1, 2, 3, 4, a="A", b="B", c="C") + + with pytest.raises(ValueError, match="accepted arguments cannot be a negative number"): + make_program_processor(my_func, ProgramFormatter, accept_args=-1) + + with pytest.raises(ValueError, match="invalid list of keyword argument names"): + make_program_processor(my_func, ProgramFormatter, accept_kwargs=["a", None]) + + @pytest.fixture def dummy_formatter(): @program_formatter @@ -47,3 +81,22 @@ def undecorated_formatter(fencil: itir.FencilDefinition, *args, **kwargs) -> str def test_wrong_processor_type_is_caught_at_runtime(dummy_formatter): with pytest.raises(TypeError, match="is not a ProgramExecutor"): ensure_processor_kind(dummy_formatter, ProgramExecutor) + + +def test_is_program_backend(): + class DummyProgramExecutor(ProgramExecutor): + def __call__(self, program: itir.FencilDefinition, *args, **kwargs) -> None: + return None + + assert not is_program_backend(DummyProgramExecutor()) + + class DummyAllocatorFactory: + __gt_allocator__ = next_allocators.StandardCPUFieldBufferAllocator() + + assert not is_program_backend(DummyAllocatorFactory()) + + class DummyBackend(DummyProgramExecutor, DummyAllocatorFactory): + def __call__(self, program: itir.FencilDefinition, *args, **kwargs) -> None: + return None + + assert is_program_backend(DummyBackend()) diff --git a/tests/next_tests/unit_tests/test_allocators.py b/tests/next_tests/unit_tests/test_allocators.py new file mode 100644 index 0000000000..456654c1d0 --- /dev/null +++ b/tests/next_tests/unit_tests/test_allocators.py @@ -0,0 +1,193 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections.abc import Sequence +from typing import Optional + +import pytest + +import gt4py._core.definitions as core_defs +import gt4py.next.allocators as next_allocators +import gt4py.next.common as common +import gt4py.storage.allocators as core_allocators + + +class DummyAllocator(next_allocators.FieldBufferAllocatorProtocol): + __gt_device_type__ = core_defs.DeviceType.CPU + + def __gt_allocate__( + self, + domain: common.Domain, + dtype: core_defs.DType[core_defs.ScalarT], + device_id: int = 0, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + pass + + +class DummyAllocatorFactory(next_allocators.FieldBufferAllocatorFactoryProtocol): + __gt_allocator__ = DummyAllocator() + + +def test_is_field_allocator(): + # Test with a field allocator + allocator = DummyAllocator() + assert next_allocators.is_field_allocator(allocator) + + # Test with an invalid object + invalid_obj = "not an allocator" + assert not next_allocators.is_field_allocator(invalid_obj) + + +def test_is_field_allocator_for(): + # Test with a valid field allocator for the specified device + assert next_allocators.is_field_allocator_for(DummyAllocator(), core_defs.DeviceType.CPU) + + # Test with a valid field allocator for a different device + assert not next_allocators.is_field_allocator_for(DummyAllocator(), core_defs.DeviceType.CUDA) + + # Test with an invalid field allocator + assert not next_allocators.is_field_allocator_for("not an allocator", core_defs.DeviceType.CPU) + + +def test_is_field_allocator_factory(): + # Test with a field allocator factory + allocator_factory = DummyAllocatorFactory() + assert next_allocators.is_field_allocator_factory(allocator_factory) + + # Test with an invalid object + invalid_obj = "not an allocator" + assert not next_allocators.is_field_allocator_factory(invalid_obj) + + +def test_is_field_allocator_factory_for(): + # Test with a field allocator factory that matches the device type + allocator_factory = DummyAllocatorFactory() + assert next_allocators.is_field_allocator_factory_for( + allocator_factory, core_defs.DeviceType.CPU + ) + + # Test with a field allocator factory that doesn't match the device type + allocator_factory = DummyAllocatorFactory() + assert not next_allocators.is_field_allocator_factory_for( + allocator_factory, core_defs.DeviceType.CUDA + ) + + # Test with an object that is not a field allocator factory + invalid_obj = "not an allocator factory" + assert not next_allocators.is_field_allocator_factory_for(invalid_obj, core_defs.DeviceType.CPU) + + +def test_get_allocator(): + # Test with a field allocator + allocator = DummyAllocator() + assert next_allocators.get_allocator(allocator) == allocator + + # Test with a field allocator factory + allocator_factory = DummyAllocatorFactory() + assert next_allocators.get_allocator(allocator_factory) == allocator_factory.__gt_allocator__ + + # Test with a default allocator + default_allocator = DummyAllocator() + assert next_allocators.get_allocator(None, default=default_allocator) == default_allocator + + # Test with an invalid object and no default allocator + invalid_obj = "not an allocator" + assert next_allocators.get_allocator(invalid_obj) is None + + with pytest.raises( + TypeError, + match=f"Object {invalid_obj} is neither a field allocator nor a field allocator factory", + ): + next_allocators.get_allocator(invalid_obj, strict=True) + + +def test_horizontal_first_layout_mapper(): + from gt4py.next.allocators import horizontal_first_layout_mapper + + # Test with only horizontal dimensions + dims = [ + common.Dimension("D0", common.DimensionKind.HORIZONTAL), + common.Dimension("D1", common.DimensionKind.HORIZONTAL), + common.Dimension("D2", common.DimensionKind.HORIZONTAL), + ] + expected_layout_map = core_allocators.BufferLayoutMap((2, 1, 0)) + assert horizontal_first_layout_mapper(dims) == expected_layout_map + + # Test with no horizontal dimensions + dims = [ + common.Dimension("D0", common.DimensionKind.VERTICAL), + common.Dimension("D1", common.DimensionKind.LOCAL), + common.Dimension("D2", common.DimensionKind.VERTICAL), + ] + expected_layout_map = core_allocators.BufferLayoutMap((2, 0, 1)) + assert horizontal_first_layout_mapper(dims) == expected_layout_map + + # Test with a mix of dimensions + dims = [ + common.Dimension("D2", common.DimensionKind.LOCAL), + common.Dimension("D0", common.DimensionKind.HORIZONTAL), + common.Dimension("D1", common.DimensionKind.VERTICAL), + ] + expected_layout_map = core_allocators.BufferLayoutMap((0, 2, 1)) + assert horizontal_first_layout_mapper(dims) == expected_layout_map + + +class TestInvalidFieldBufferAllocator: + def test_allocate(self): + allocator = next_allocators.InvalidFieldBufferAllocator( + core_defs.DeviceType.CPU, ValueError("test error") + ) + I = common.Dimension("I") + J = common.Dimension("J") + domain = common.domain(((I, (2, 4)), (J, (3, 5)))) + dtype = float + with pytest.raises(ValueError, match="test error"): + allocator.__gt_allocate__(domain, dtype) + + +def test_allocate(): + from gt4py.next.allocators import StandardCPUFieldBufferAllocator, allocate + + I = common.Dimension("I") + J = common.Dimension("J") + domain = common.domain(((I, (0, 2)), (J, (0, 3)))) + dtype = core_defs.dtype(float) + + # Test with a explicit field allocator + allocator = StandardCPUFieldBufferAllocator() + tensor_buffer = allocate(domain, dtype, allocator=allocator) + assert tensor_buffer.shape == domain.shape + assert tensor_buffer.dtype == dtype + assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0) + + # Test with a device + device = core_defs.Device(core_defs.DeviceType.CPU, 0) + tensor_buffer = allocate(domain, dtype, device=device) + assert tensor_buffer.shape == domain.shape + assert tensor_buffer.dtype == dtype + assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0) + + # Test with both allocator and device + with pytest.raises(ValueError, match="are incompatible"): + allocate( + domain, + dtype, + allocator=allocator, + device=core_defs.Device(core_defs.DeviceType.CUDA, 0), + ) + + # Test with no device or allocator + with pytest.raises(ValueError, match="No 'device' or 'allocator' specified"): + allocate(domain, dtype) diff --git a/tests/next_tests/unit_tests/test_constructors.py b/tests/next_tests/unit_tests/test_constructors.py new file mode 100644 index 0000000000..e8b070f0c0 --- /dev/null +++ b/tests/next_tests/unit_tests/test_constructors.py @@ -0,0 +1,175 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import numpy as np +import pytest + +from gt4py import next as gtx +from gt4py._core import definitions as core_defs +from gt4py.next import allocators as next_allocators, common, float32 +from gt4py.next.program_processors.runners import roundtrip + +from next_tests.integration_tests import cases + + +I = gtx.Dimension("I") +J = gtx.Dimension("J") +K = gtx.Dimension("K") + +sizes = {I: 10, J: 10, K: 10} + + +# TODO: parametrize with gpu backend and compare with cupy array +@pytest.mark.parametrize( + "allocator, device", + [ + [next_allocators.StandardCPUFieldBufferAllocator(), None], + [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], + ], +) +def test_empty(allocator, device): + ref = np.empty([sizes[I], sizes[J]]).astype(gtx.float32) + a = gtx.empty( + domain={I: range(sizes[I]), J: range(sizes[J])}, + dtype=core_defs.dtype(np.float32), + allocator=allocator, + device=device, + ) + assert a.shape == ref.shape + + +# TODO: parametrize with gpu backend and compare with cupy array +@pytest.mark.parametrize( + "allocator, device", + [ + [next_allocators.StandardCPUFieldBufferAllocator(), None], + [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], + ], +) +def test_zeros(allocator, device): + a = gtx.zeros( + common.Domain( + dims=(I, J), ranges=(common.UnitRange(0, sizes[I]), common.UnitRange(0, sizes[J])) + ), + dtype=core_defs.dtype(np.float32), + allocator=allocator, + device=device, + ) + ref = np.zeros((sizes[I], sizes[J])).astype(gtx.float32) + + assert np.array_equal(a.ndarray, ref) + + +# TODO: parametrize with gpu backend and compare with cupy array +@pytest.mark.parametrize( + "allocator, device", + [ + [next_allocators.StandardCPUFieldBufferAllocator(), None], + [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], + ], +) +def test_ones(allocator, device): + a = gtx.ones( + common.Domain(dims=(I, J), ranges=(common.UnitRange(0, 10), common.UnitRange(0, 10))), + dtype=core_defs.dtype(np.float32), + allocator=allocator, + device=device, + ) + ref = np.ones((sizes[I], sizes[J])).astype(gtx.float32) + + assert np.array_equal(a.ndarray, ref) + + +# TODO: parametrize with gpu backend and compare with cupy array +@pytest.mark.parametrize( + "allocator, device", + [ + [next_allocators.StandardCPUFieldBufferAllocator(), None], + [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], + ], +) +def test_full(allocator, device): + a = gtx.full( + domain={I: range(sizes[I] - 2), J: (sizes[J] - 2)}, + fill_value=42.0, + dtype=core_defs.dtype(np.float32), + allocator=allocator, + device=device, + ) + ref = np.full((sizes[I] - 2, sizes[J] - 2), 42.0).astype(gtx.float32) + + assert np.array_equal(a.ndarray, ref) + + +def test_as_field(): + ref = np.random.rand(sizes[I]).astype(gtx.float32) + a = gtx.as_field([I], ref) + assert np.array_equal(a.ndarray, ref) + + +def test_as_field_domain(): + ref = np.random.rand(sizes[I] - 1, sizes[J] - 1).astype(gtx.float32) + domain = common.Domain( + dims=(I, J), + ranges=(common.UnitRange(0, sizes[I] - 1), common.UnitRange(0, sizes[J] - 1)), + ) + a = gtx.as_field(domain, ref) + assert np.array_equal(a.ndarray, ref) + + +def test_as_field_origin(): + data = np.random.rand(sizes[I], sizes[J]).astype(gtx.float32) + a = gtx.as_field([I, J], data, origin={I: 1, J: 2}) + domain_range = [(val.start, val.stop) for val in a.domain.ranges] + assert np.allclose(domain_range, [(-1, 9), (-2, 8)]) + + +# check that `as_field()` domain is correct depending on data origin and domain itself +def test_field_wrong_dims(): + with pytest.raises( + ValueError, + match=(r"Cannot construct `Field` from array of shape"), + ): + gtx.as_field([I, J], np.random.rand(sizes[I]).astype(gtx.float32)) + + +def test_field_wrong_domain(): + with pytest.raises( + ValueError, + match=(r"Cannot construct `Field` from array of shape"), + ): + domain = common.Domain( + dims=(I, J), + ranges=(common.UnitRange(0, sizes[I] - 1), common.UnitRange(0, sizes[J] - 1)), + ) + gtx.as_field(domain, np.random.rand(sizes[I], sizes[J]).astype(gtx.float32)) + + +def test_field_wrong_origin(): + with pytest.raises( + ValueError, + match=(r"Origin keys {'J'} not in domain"), + ): + gtx.as_field([I], np.random.rand(sizes[I]).astype(gtx.float32), origin={"J": 0}) + + with pytest.raises( + ValueError, + match=(r"Cannot specify origin for domain I"), + ): + gtx.as_field("I", np.random.rand(sizes[J]).astype(gtx.float32), origin={"J": 0}) + + +@pytest.mark.xfail(reason="aligned_index not supported yet") +def test_aligned_index(): + gtx.as_field([I], np.random.rand(sizes[I]).astype(gtx.float32), aligned_index=[I, 0])