Skip to content

Commit

Permalink
feat[next] Enable GPU backend tests
Browse files Browse the repository at this point in the history
  • Loading branch information
havogt committed Nov 15, 2023
1 parent 0df592d commit 5c179a1
Show file tree
Hide file tree
Showing 13 changed files with 75 additions and 44 deletions.
5 changes: 4 additions & 1 deletion src/gt4py/next/embedded/nd_array_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ def ndarray(self) -> core_defs.NDArrayObject:
return self._ndarray

def __array__(self, dtype: npt.DTypeLike = None) -> np.ndarray:
return np.asarray(self._ndarray, dtype)
if self.array_ns == cp:
return np.asarray(cp.asnumpy(self._ndarray), dtype)
else:
return np.asarray(self._ndarray, dtype)

@property
def dtype(self) -> core_defs.DType[core_defs.ScalarT]:
Expand Down
59 changes: 31 additions & 28 deletions src/gt4py/next/program_processors/codegens/gtfn/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ def visit_FunCall(self, node: gtfn_ir.FunCall, **kwargs):
"""
)

def visit_FunctionDefinition(self, node: gtfn_ir.FunctionDefinition, **kwargs):
expr_ = "return " + self.visit(node.expr)
return self.generic_visit(node, expr_=expr_)

FunctionDefinition = as_mako(
"""
struct ${id} {
Expand Down Expand Up @@ -206,24 +210,6 @@ def visit_FunCall(self, node: gtfn_ir.FunCall, **kwargs):
"""
)

def visit_FunctionDefinition(self, node: gtfn_ir.FunctionDefinition, **kwargs):
expr_ = "return " + self.visit(node.expr)
return self.generic_visit(node, expr_=expr_)

def visit_FencilDefinition(
self, node: gtfn_ir.FencilDefinition, **kwargs: Any
) -> Union[str, Collection[str]]:
self.is_cartesian = node.grid_type == common.GridType.CARTESIAN
self.user_defined_function_ids = list(
str(fundef.id) for fundef in node.function_definitions
)
return self.generic_visit(
node,
grid_type_str=self._grid_type_str[node.grid_type],
block_sizes=self._block_sizes(node.offset_definitions),
**kwargs,
)

def visit_TemporaryAllocation(self, node, **kwargs):
# TODO(tehrengruber): Revisit. We are currently converting an itir.NamedRange with
# start and stop values into an gtfn_ir.(Cartesian|Unstructured)Domain with
Expand All @@ -244,6 +230,20 @@ def visit_TemporaryAllocation(self, node, **kwargs):
"auto {id} = gtfn::allocate_global_tmp<{dtype}>(tmp_alloc__, {tmp_sizes});"
)

def visit_FencilDefinition(
self, node: gtfn_ir.FencilDefinition, **kwargs: Any
) -> Union[str, Collection[str]]:
self.is_cartesian = node.grid_type == common.GridType.CARTESIAN
self.user_defined_function_ids = list(
str(fundef.id) for fundef in node.function_definitions
)
return self.generic_visit(
node,
grid_type_str=self._grid_type_str[node.grid_type],
block_sizes=self._block_sizes(node.offset_definitions),
**kwargs,
)

FencilDefinition = as_mako(
"""
#include <cmath>
Expand Down Expand Up @@ -277,16 +277,19 @@ def visit_TemporaryAllocation(self, node, **kwargs):
)

def _block_sizes(self, offset_definitions: list[gtfn_ir.TagDefinition]) -> str:
block_dims = []
block_sizes = [32, 8] + [1] * (len(offset_definitions) - 2)
for i, tag in enumerate(offset_definitions):
if tag.alias is None:
block_dims.append(
f"gridtools::meta::list<{tag.name.id}_t, "
f"gridtools::integral_constant<int, {block_sizes[i]}>>"
)
sizes_str = ",\n".join(block_dims)
return f"using block_sizes_t = gridtools::meta::list<{sizes_str}>;"
if self.is_cartesian:
block_dims = []
block_sizes = [32, 8] + [1] * (len(offset_definitions) - 2)
for i, tag in enumerate(offset_definitions):
if tag.alias is None:
block_dims.append(
f"gridtools::meta::list<{tag.name.id}_t, "
f"gridtools::integral_constant<int, {block_sizes[i]}>>"
)
sizes_str = ",\n".join(block_dims)
return f"using block_sizes_t = gridtools::meta::list<{sizes_str}>;"
else:
return "using block_sizes_t = gridtools::meta::list<gridtools::meta::list<gtfn::unstructured::dim::horizontal, gridtools::integral_constant<int, 32>>, gridtools::meta::list<gtfn::unstructured::dim::vertical, gridtools::integral_constant<int, 8>>>;"

@classmethod
def apply(cls, root: Any, **kwargs: Any) -> str:
Expand Down
5 changes: 4 additions & 1 deletion src/gt4py/next/program_processors/runners/gtfn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from typing import Any

import cupy as cp # TODO
import numpy.typing as npt

import gt4py._core.definitions as core_defs
Expand Down Expand Up @@ -67,7 +68,9 @@ def extract_connectivity_args(
raise NotImplementedError(
"Only `NeighborTable` connectivities implemented at this point."
)
args.append((conn.table, tuple([0] * 2)))
args.append(
(cp.asarray(conn.table), tuple([0] * 2))
) # TODO where do we do the host<->device of neighbortables
elif isinstance(conn, common.Dimension):
pass
else:
Expand Down
5 changes: 5 additions & 0 deletions tests/next_tests/exclusion_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class ProgramBackendId(_PythonObjectIdMixin, str, enum.Enum):
GTFN_CPU_WITH_TEMPORARIES = (
"gt4py.next.program_processors.runners.gtfn.run_gtfn_with_temporaries"
)
GTFN_GPU = "gt4py.next.program_processors.runners.gtfn.run_gtfn_gpu"
ROUNDTRIP = "gt4py.next.program_processors.runners.roundtrip.backend"
DOUBLE_ROUNDTRIP = "gt4py.next.program_processors.runners.double_roundtrip.backend"

Expand Down Expand Up @@ -136,6 +137,10 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
],
ProgramBackendId.GTFN_GPU: GTFN_SKIP_TEST_LIST
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
],
ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
Expand Down
6 changes: 5 additions & 1 deletion tests/next_tests/integration_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def verify_with_default_data(
``comparison(ref, <out | inout>)`` and should return a boolean.
"""
inps, kwfields = get_default_data(case, fieldop)
ref_args = tuple(i.ndarray if hasattr(i, "ndarray") else i for i in inps)
ref_args = tuple(i.__array__() if common.is_field(i) else i for i in inps)
verify(
case,
fieldop,
Expand Down Expand Up @@ -598,3 +598,7 @@ class Case:
offset_provider: dict[str, common.Connectivity | gtx.Dimension]
default_sizes: dict[gtx.Dimension, int]
grid_type: common.GridType

@property
def as_field(self):
return constructors.as_field.partial(allocator=self.backend)
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def no_backend(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> Non
definitions.ProgramBackendId.GTFN_CPU,
definitions.ProgramBackendId.GTFN_CPU_IMPERATIVE,
definitions.ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES,
pytest.param(definitions.ProgramBackendId.GTFN_GPU, marks=pytest.mark.requires_gpu),
]
+ OPTIONAL_PROCESSORS,
ids=lambda p: p.short_id() if p is not None else "None",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def testee(a: tuple[tuple[cases.IField, cases.IField], cases.IField]) -> cases.I
def test_fieldop_from_scan(cartesian_case, forward):
init = 1.0
expected = np.arange(init + 1.0, init + 1.0 + cartesian_case.default_sizes[IDim], 1)
out = gtx.as_field([KDim], np.zeros((cartesian_case.default_sizes[KDim],)))
out = cartesian_case.as_field([KDim], np.zeros((cartesian_case.default_sizes[KDim],)))

if not forward:
expected = np.flip(expected)
Expand All @@ -526,6 +526,7 @@ def simple_scan_operator(carry: float) -> float:
def test_solve_triag(cartesian_case):
if cartesian_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
Expand Down Expand Up @@ -637,8 +638,8 @@ def simple_scan_operator(carry: float, a: float) -> float:
return carry if carry > a else carry + 1.0

k_size = cartesian_case.default_sizes[KDim]
a = gtx.as_field([KDim], 4.0 * np.ones((k_size,)))
out = gtx.as_field([KDim], np.zeros((k_size,)))
a = cartesian_case.as_field([KDim], 4.0 * np.ones((k_size,)))
out = cartesian_case.as_field([KDim], np.zeros((k_size,)))

cases.verify(
cartesian_case,
Expand Down Expand Up @@ -685,16 +686,19 @@ def testee(out: tuple[cases.KField, tuple[cases.KField, cases.KField]]):
def test_scan_nested_tuple_input(cartesian_case):
init = 1.0
k_size = cartesian_case.default_sizes[KDim]
inp1 = gtx.as_field([KDim], np.ones((k_size,)))
inp2 = gtx.as_field([KDim], np.arange(0.0, k_size, 1))
out = gtx.as_field([KDim], np.zeros((k_size,)))

inp1_np = np.ones((k_size,))
inp2_np = np.arange(0.0, k_size, 1)
inp1 = cartesian_case.as_field([KDim], inp1_np)
inp2 = cartesian_case.as_field([KDim], inp2_np)
out = cartesian_case.as_field([KDim], np.zeros((k_size,)))

def prev_levels_iterator(i):
return range(i + 1)

expected = np.asarray(
[
reduce(lambda prev, i: prev + inp1[i] + inp2[i], prev_levels_iterator(i), init)
reduce(lambda prev, i: prev + inp1_np[i] + inp2_np[i], prev_levels_iterator(i), init)
for i in range(k_size)
]
)
Expand Down Expand Up @@ -760,6 +764,7 @@ def program_domain(a: cases.IField, out: cases.IField):
def test_domain_input_bounds(cartesian_case):
if cartesian_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def testee(
inp * ones(V2E), axis=V2EDim
) # multiplication with shifted `ones` because reduction of only non-shifted field with local dimension is not supported

inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table)
inp = unstructured_case.as_field(
[Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table
)
ones = cases.allocate(unstructured_case, testee, "ones").strategy(cases.ConstInitializer(1))()

cases.verify(
Expand All @@ -56,7 +58,9 @@ def test_external_local_field_only(unstructured_case):
def testee(inp: gtx.Field[[Vertex, V2EDim], int32]) -> gtx.Field[[Vertex], int32]:
return neighbor_sum(inp, axis=V2EDim)

inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table)
inp = unstructured_case.as_field(
[Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table
)

cases.verify(
unstructured_case,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
def test_maxover_execution_(unstructured_case, strategy):
if unstructured_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def test_math_function_builtins_execution(cartesian_case, builtin_name: str, inp
else:
ref_impl: Callable = getattr(np, builtin_name)

inps = [gtx.as_field([IDim], np.asarray(input)) for input in inputs]
inps = [cartesian_case.as_field([IDim], np.asarray(input)) for input in inputs]
expected = ref_impl(*inputs)
out = gtx.as_field([IDim], np.zeros_like(expected))
out = cartesian_case.as_field([IDim], np.zeros_like(expected))

builtin_field_op = make_builtin_field_operator(builtin_name).with_backend(
cartesian_case.backend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class setup:
def test_solve_nonhydro_stencil_52_like_z_q(test_setup, fieldview_backend):
if fieldview_backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
Expand Down
1 change: 1 addition & 0 deletions tests/next_tests/unit_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def lift_mode(request):
(definitions.ProgramBackendId.GTFN_CPU, True),
(definitions.ProgramBackendId.GTFN_CPU_IMPERATIVE, True),
(definitions.ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES, True),
pytest.param((definitions.ProgramBackendId.GTFN_GPU, True), marks=pytest.mark.requires_gpu),
(definitions.ProgramFormatterId.LISP_FORMATTER, False),
(definitions.ProgramFormatterId.ITIR_PRETTY_PRINTER, False),
(definitions.ProgramFormatterId.ITIR_TYPE_CHECKER, False),
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ set_env =
PIP_EXTRA_INDEX_URL = {env:PIP_EXTRA_INDEX_URL:https://test.pypi.org/simple/}
commands =
nomesh-cpu: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "not requires_atlas and not requires_gpu" {posargs} tests{/}next_tests
nomesh-gpu: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "not requires_atlas and requires_gpu" {posargs} tests{/}next_tests
nomesh-{cuda,cuda11x,cuda12x}: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "not requires_atlas and requires_gpu" {posargs} tests{/}next_tests
atlas-cpu: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "requires_atlas and not requires_gpu" {posargs} tests{/}next_tests
atlas-gpu: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "requires_atlas and requires_gpu" {posargs} tests{/}next_tests
atlas-{cuda,cuda11x,cuda12x}: python -m pytest --cache-clear -v -n {env:NUM_PROCESSES:1} -m "requires_atlas and requires_gpu" {posargs} tests{/}next_tests
pytest --doctest-modules src{/}gt4py{/}next

[testenv:storage-py{38,39,310}-{internal,dace}-{cpu,cuda,cuda11x,cuda12x}]
Expand Down

0 comments on commit 5c179a1

Please sign in to comment.