Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat[next] Enable GPU backend tests #1357

Merged
merged 20 commits into from
Nov 20, 2023
Merged
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/gt4py/next/embedded/nd_array_field.py
Original file line number Diff line number Diff line change
@@ -121,7 +121,10 @@ def ndarray(self) -> core_defs.NDArrayObject:
return self._ndarray

def __array__(self, dtype: npt.DTypeLike = None) -> np.ndarray:
return np.asarray(self._ndarray, dtype)
if self.array_ns == cp:
return np.asarray(cp.asnumpy(self._ndarray), dtype)
else:
return np.asarray(self._ndarray, dtype)

@property
def dtype(self) -> core_defs.DType[core_defs.ScalarT]:
59 changes: 31 additions & 28 deletions src/gt4py/next/program_processors/codegens/gtfn/codegen.py
Original file line number Diff line number Diff line change
@@ -179,6 +179,10 @@ def visit_FunCall(self, node: gtfn_ir.FunCall, **kwargs):
"""
)

def visit_FunctionDefinition(self, node: gtfn_ir.FunctionDefinition, **kwargs):
expr_ = "return " + self.visit(node.expr)
return self.generic_visit(node, expr_=expr_)

FunctionDefinition = as_mako(
"""
struct ${id} {
@@ -206,24 +210,6 @@ def visit_FunCall(self, node: gtfn_ir.FunCall, **kwargs):
"""
)

def visit_FunctionDefinition(self, node: gtfn_ir.FunctionDefinition, **kwargs):
expr_ = "return " + self.visit(node.expr)
return self.generic_visit(node, expr_=expr_)

def visit_FencilDefinition(
self, node: gtfn_ir.FencilDefinition, **kwargs: Any
) -> Union[str, Collection[str]]:
self.is_cartesian = node.grid_type == common.GridType.CARTESIAN
self.user_defined_function_ids = list(
str(fundef.id) for fundef in node.function_definitions
)
return self.generic_visit(
node,
grid_type_str=self._grid_type_str[node.grid_type],
block_sizes=self._block_sizes(node.offset_definitions),
**kwargs,
)

def visit_TemporaryAllocation(self, node, **kwargs):
# TODO(tehrengruber): Revisit. We are currently converting an itir.NamedRange with
# start and stop values into an gtfn_ir.(Cartesian|Unstructured)Domain with
@@ -244,6 +230,20 @@ def visit_TemporaryAllocation(self, node, **kwargs):
"auto {id} = gtfn::allocate_global_tmp<{dtype}>(tmp_alloc__, {tmp_sizes});"
)

def visit_FencilDefinition(
self, node: gtfn_ir.FencilDefinition, **kwargs: Any
) -> Union[str, Collection[str]]:
self.is_cartesian = node.grid_type == common.GridType.CARTESIAN
self.user_defined_function_ids = list(
str(fundef.id) for fundef in node.function_definitions
)
return self.generic_visit(
node,
grid_type_str=self._grid_type_str[node.grid_type],
block_sizes=self._block_sizes(node.offset_definitions),
**kwargs,
)

FencilDefinition = as_mako(
"""
#include <cmath>
@@ -277,16 +277,19 @@ def visit_TemporaryAllocation(self, node, **kwargs):
)

def _block_sizes(self, offset_definitions: list[gtfn_ir.TagDefinition]) -> str:
block_dims = []
block_sizes = [32, 8] + [1] * (len(offset_definitions) - 2)
for i, tag in enumerate(offset_definitions):
if tag.alias is None:
block_dims.append(
f"gridtools::meta::list<{tag.name.id}_t, "
f"gridtools::integral_constant<int, {block_sizes[i]}>>"
)
sizes_str = ",\n".join(block_dims)
return f"using block_sizes_t = gridtools::meta::list<{sizes_str}>;"
if self.is_cartesian:
block_dims = []
block_sizes = [32, 8] + [1] * (len(offset_definitions) - 2)
for i, tag in enumerate(offset_definitions):
if tag.alias is None:
block_dims.append(
f"gridtools::meta::list<{tag.name.id}_t, "
f"gridtools::integral_constant<int, {block_sizes[i]}>>"
)
sizes_str = ",\n".join(block_dims)
return f"using block_sizes_t = gridtools::meta::list<{sizes_str}>;"
else:
return "using block_sizes_t = gridtools::meta::list<gridtools::meta::list<gtfn::unstructured::dim::horizontal, gridtools::integral_constant<int, 32>>, gridtools::meta::list<gtfn::unstructured::dim::vertical, gridtools::integral_constant<int, 8>>>;"

@classmethod
def apply(cls, root: Any, **kwargs: Any) -> str:
30 changes: 25 additions & 5 deletions src/gt4py/next/program_processors/runners/gtfn.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,8 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later

import functools
import warnings
from typing import Any

import numpy.typing as npt
@@ -42,12 +44,14 @@ def convert_arg(arg: Any) -> Any:
return arg


def convert_args(inp: stages.CompiledProgram) -> stages.CompiledProgram:
def convert_args(
inp: stages.CompiledProgram, device: core_defs.DeviceType = core_defs.DeviceType.CPU
) -> stages.CompiledProgram:
def decorated_program(
*args, offset_provider: dict[str, common.Connectivity | common.Dimension]
):
converted_args = [convert_arg(arg) for arg in args]
conn_args = extract_connectivity_args(offset_provider)
conn_args = extract_connectivity_args(offset_provider, device)
return inp(
*converted_args,
*conn_args,
@@ -56,8 +60,22 @@ def decorated_program(
return decorated_program


def _ensure_is_on_device(
connectivity_arg: npt.NDArray, device: core_defs.DeviceType
) -> npt.NDArray:
if device == core_defs.DeviceType.CUDA:
import cupy as cp

if not isinstance(connectivity_arg, cp.ndarray):
warnings.warn(
"Copying connectivity to device. For performance make sure connectivity is provided on device."
)
return cp.asarray(connectivity_arg)
return connectivity_arg


def extract_connectivity_args(
offset_provider: dict[str, common.Connectivity | common.Dimension]
offset_provider: dict[str, common.Connectivity | common.Dimension], device: core_defs.DeviceType
) -> list[tuple[npt.NDArray, tuple[int, ...]]]:
# note: the order here needs to agree with the order of the generated bindings
args: list[tuple[npt.NDArray, tuple[int, ...]]] = []
@@ -67,7 +85,9 @@ def extract_connectivity_args(
raise NotImplementedError(
"Only `NeighborTable` connectivities implemented at this point."
)
args.append((conn.table, tuple([0] * 2)))
# copying to device here is a fallback for easy testing and might be removed later
conn_arg = _ensure_is_on_device(conn.table, device)
args.append((conn_arg, tuple([0] * 2)))
elif isinstance(conn, common.Dimension):
pass
else:
@@ -126,7 +146,7 @@ def compilation_hash(otf_closure: stages.ProgramCall) -> int:
translation=GTFN_GPU_TRANSLATION_STEP,
bindings=nanobind.bind_source,
compilation=GTFN_DEFAULT_COMPILE_STEP,
decoration=convert_args,
decoration=functools.partial(convert_args, device=core_defs.DeviceType.CUDA),
)


5 changes: 5 additions & 0 deletions tests/next_tests/exclusion_matrices.py
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@ class ProgramBackendId(_PythonObjectIdMixin, str, enum.Enum):
GTFN_CPU_WITH_TEMPORARIES = (
"gt4py.next.program_processors.runners.gtfn.run_gtfn_with_temporaries"
)
GTFN_GPU = "gt4py.next.program_processors.runners.gtfn.run_gtfn_gpu"
ROUNDTRIP = "gt4py.next.program_processors.runners.roundtrip.backend"
DOUBLE_ROUNDTRIP = "gt4py.next.program_processors.runners.double_roundtrip.backend"

@@ -136,6 +137,10 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
],
ProgramBackendId.GTFN_GPU: GTFN_SKIP_TEST_LIST
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
],
ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST
+ [
(USES_STRIDED_NEIGHBOR_OFFSET, XFAIL, BINDINGS_UNSUPPORTED_MESSAGE),
15 changes: 13 additions & 2 deletions tests/next_tests/integration_tests/cases.py
Original file line number Diff line number Diff line change
@@ -117,12 +117,19 @@ def from_case(
return self


@dataclasses.dataclass
@dataclasses.dataclass(init=False)
class ConstInitializer(DataInitializer):
"""Initialize with a given value across the coordinate space."""

value: ScalarValue

def __init__(self, value: ScalarValue):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def __init__(self, value: ScalarValue):
def __init__(self, value: core_defs.Scalar):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and remove ScalarValue completely

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... but kept ScalarValue pointing to core_defs.Scalar as this seems to be the pattern that is used in this field

if hasattr(value, "__array__") or hasattr(value, "__getitem__"):
raise ValueError(
"`ConstInitializer` can not be used with non-scalars. Use `Case.as_field` instead."
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if hasattr(value, "__array__") or hasattr(value, "__getitem__"):
raise ValueError(
"`ConstInitializer` can not be used with non-scalars. Use `Case.as_field` instead."
)
if not core_defs.is_scalar_type(value):
raise ValueError(
"`ConstInitializer` can not be used with non-scalars. Use `Case.as_field` instead."
)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed that...

self.value = value

@property
def scalar_value(self) -> ScalarValue:
return self.value
@@ -460,7 +467,7 @@ def verify_with_default_data(
``comparison(ref, <out | inout>)`` and should return a boolean.
"""
inps, kwfields = get_default_data(case, fieldop)
ref_args = tuple(i.ndarray if hasattr(i, "ndarray") else i for i in inps)
ref_args = tuple(i.__array__() if common.is_field(i) else i for i in inps)
verify(
case,
fieldop,
@@ -598,3 +605,7 @@ class Case:
offset_provider: dict[str, common.Connectivity | gtx.Dimension]
default_sizes: dict[gtx.Dimension, int]
grid_type: common.GridType

@property
def as_field(self):
return constructors.as_field.partial(allocator=self.backend)
Original file line number Diff line number Diff line change
@@ -53,6 +53,7 @@ def no_backend(program: itir.FencilDefinition, *args: Any, **kwargs: Any) -> Non
definitions.ProgramBackendId.GTFN_CPU,
definitions.ProgramBackendId.GTFN_CPU_IMPERATIVE,
definitions.ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES,
pytest.param(definitions.ProgramBackendId.GTFN_GPU, marks=pytest.mark.requires_gpu),
]
+ OPTIONAL_PROCESSORS,
ids=lambda p: p.short_id() if p is not None else "None",
Original file line number Diff line number Diff line change
@@ -506,7 +506,7 @@ def testee(a: tuple[tuple[cases.IField, cases.IField], cases.IField]) -> cases.I
def test_fieldop_from_scan(cartesian_case, forward):
init = 1.0
expected = np.arange(init + 1.0, init + 1.0 + cartesian_case.default_sizes[IDim], 1)
out = gtx.as_field([KDim], np.zeros((cartesian_case.default_sizes[KDim],)))
out = cartesian_case.as_field([KDim], np.zeros((cartesian_case.default_sizes[KDim],)))

if not forward:
expected = np.flip(expected)
@@ -526,6 +526,7 @@ def simple_scan_operator(carry: float) -> float:
def test_solve_triag(cartesian_case):
if cartesian_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
@@ -637,8 +638,8 @@ def simple_scan_operator(carry: float, a: float) -> float:
return carry if carry > a else carry + 1.0

k_size = cartesian_case.default_sizes[KDim]
a = gtx.as_field([KDim], 4.0 * np.ones((k_size,)))
out = gtx.as_field([KDim], np.zeros((k_size,)))
a = cartesian_case.as_field([KDim], 4.0 * np.ones((k_size,)))
out = cartesian_case.as_field([KDim], np.zeros((k_size,)))

cases.verify(
cartesian_case,
@@ -685,16 +686,19 @@ def testee(out: tuple[cases.KField, tuple[cases.KField, cases.KField]]):
def test_scan_nested_tuple_input(cartesian_case):
init = 1.0
k_size = cartesian_case.default_sizes[KDim]
inp1 = gtx.as_field([KDim], np.ones((k_size,)))
inp2 = gtx.as_field([KDim], np.arange(0.0, k_size, 1))
out = gtx.as_field([KDim], np.zeros((k_size,)))

inp1_np = np.ones((k_size,))
inp2_np = np.arange(0.0, k_size, 1)
inp1 = cartesian_case.as_field([KDim], inp1_np)
inp2 = cartesian_case.as_field([KDim], inp2_np)
out = cartesian_case.as_field([KDim], np.zeros((k_size,)))

def prev_levels_iterator(i):
return range(i + 1)

expected = np.asarray(
[
reduce(lambda prev, i: prev + inp1[i] + inp2[i], prev_levels_iterator(i), init)
reduce(lambda prev, i: prev + inp1_np[i] + inp2_np[i], prev_levels_iterator(i), init)
for i in range(k_size)
]
)
@@ -760,6 +764,7 @@ def program_domain(a: cases.IField, out: cases.IField):
def test_domain_input_bounds(cartesian_case):
if cartesian_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
Original file line number Diff line number Diff line change
@@ -35,7 +35,9 @@ def testee(
inp * ones(V2E), axis=V2EDim
) # multiplication with shifted `ones` because reduction of only non-shifted field with local dimension is not supported

inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table)
inp = unstructured_case.as_field(
[Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table
)
ones = cases.allocate(unstructured_case, testee, "ones").strategy(cases.ConstInitializer(1))()

cases.verify(
@@ -56,7 +58,9 @@ def test_external_local_field_only(unstructured_case):
def testee(inp: gtx.Field[[Vertex, V2EDim], int32]) -> gtx.Field[[Vertex], int32]:
return neighbor_sum(inp, axis=V2EDim)

inp = gtx.as_field([Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table)
inp = unstructured_case.as_field(
[Vertex, V2EDim], unstructured_case.offset_provider["V2E"].table
)

cases.verify(
unstructured_case,
Original file line number Diff line number Diff line change
@@ -47,6 +47,7 @@
def test_maxover_execution_(unstructured_case, strategy):
if unstructured_case.backend in [
gtfn.run_gtfn,
gtfn.run_gtfn_gpu,
gtfn.run_gtfn_imperative,
gtfn.run_gtfn_with_temporaries,
]:
@@ -137,10 +138,7 @@ def conditional_nested_tuple(
return where(mask, ((a, b), (b, a)), ((5.0, 7.0), (7.0, 5.0)))

size = cartesian_case.default_sizes[IDim]
bool_field = np.random.choice(a=[False, True], size=(size))
mask = cases.allocate(cartesian_case, conditional_nested_tuple, "mask").strategy(
cases.ConstInitializer(bool_field)
)()
mask = cartesian_case.as_field([IDim], np.random.choice(a=[False, True], size=size))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we built in to cases that boolean fields are allocated with alternating values by default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe in a follow-up PR to get the GPU support in

a = cases.allocate(cartesian_case, conditional_nested_tuple, "a")()
b = cases.allocate(cartesian_case, conditional_nested_tuple, "b")()

@@ -210,10 +208,7 @@ def conditional(
return where(mask, a, b)

size = cartesian_case.default_sizes[IDim]
bool_field = np.random.choice(a=[False, True], size=(size))
mask = cases.allocate(cartesian_case, conditional, "mask").strategy(
cases.ConstInitializer(bool_field)
)()
mask = cartesian_case.as_field([IDim], np.random.choice(a=[False, True], size=(size)))
a = cases.allocate(cartesian_case, conditional, "a")()
b = cases.allocate(cartesian_case, conditional, "b")()
out = cases.allocate(cartesian_case, conditional, cases.RETURN)()
@@ -227,10 +222,7 @@ def conditional_promotion(mask: cases.IBoolField, a: cases.IFloatField) -> cases
return where(mask, a, 10.0)

size = cartesian_case.default_sizes[IDim]
bool_field = np.random.choice(a=[False, True], size=(size))
mask = cases.allocate(cartesian_case, conditional_promotion, "mask").strategy(
cases.ConstInitializer(bool_field)
)()
mask = cartesian_case.as_field([IDim], np.random.choice(a=[False, True], size=(size)))
a = cases.allocate(cartesian_case, conditional_promotion, "a")()
out = cases.allocate(cartesian_case, conditional_promotion, cases.RETURN)()

@@ -267,7 +259,7 @@ def conditional_program(
conditional_shifted(mask, a, b, out=out)

size = cartesian_case.default_sizes[IDim] + 1
mask = gtx.as_field([IDim], np.random.choice(a=[False, True], size=(size)))
mask = cartesian_case.as_field([IDim], np.random.choice(a=[False, True], size=(size)))
a = cases.allocate(cartesian_case, conditional_program, "a").extend({IDim: (0, 1)})()
b = cases.allocate(cartesian_case, conditional_program, "b").extend({IDim: (0, 1)})()
out = cases.allocate(cartesian_case, conditional_shifted, cases.RETURN)()
Original file line number Diff line number Diff line change
@@ -122,9 +122,9 @@ def test_math_function_builtins_execution(cartesian_case, builtin_name: str, inp
else:
ref_impl: Callable = getattr(np, builtin_name)

inps = [gtx.as_field([IDim], np.asarray(input)) for input in inputs]
inps = [cartesian_case.as_field([IDim], np.asarray(input)) for input in inputs]
expected = ref_impl(*inputs)
out = gtx.as_field([IDim], np.zeros_like(expected))
out = cartesian_case.as_field([IDim], np.zeros_like(expected))

builtin_field_op = make_builtin_field_operator(builtin_name).with_backend(
cartesian_case.backend
Loading