feat[next][dace]: Add gt4py workflow for the DaCe backend (#1477)

edopao · web-flow · commit 628a33bf5e7d · 2024-03-04T10:35:52.000+01:00
Integrate the DaCe backend with the workflow API.
diff --git a/src/gt4py/next/otf/languages.py b/src/gt4py/next/otf/languages.py
@@ -57,6 +57,11 @@ class Python(LanguageTag):
     ...
 
 
+class SDFG(LanguageTag):
+    settings_class = LanguageSettings
+    ...
+
+
 class NanobindSrcL(LanguageTag): ...
 
 
diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py
@@ -0,0 +1,88 @@
+# GT4Py - GridTools Framework
+#
+# Copyright (c) 2014-2023, ETH Zurich
+# All rights reserved.
+#
+# This file is part of the GT4Py project and the GridTools framework.
+# GT4Py is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or any later
+# version. See the LICENSE.txt file at the top-level directory of this
+# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import functools
+
+import factory
+
+import gt4py._core.definitions as core_defs
+from gt4py.next import config
+from gt4py.next.otf import recipes, stages
+from gt4py.next.program_processors.runners.dace_iterator.workflow import (
+    DaCeCompilationStepFactory,
+    DaCeTranslationStepFactory,
+    convert_args,
+)
+from gt4py.next.program_processors.runners.gtfn import GTFNBackendFactory
+
+
+def _no_bindings(inp: stages.ProgramSource) -> stages.CompilableSource:
+    return stages.CompilableSource(program_source=inp, binding_source=None)
+
+
+class DaCeWorkflowFactory(factory.Factory):
+    class Meta:
+        model = recipes.OTFCompileWorkflow
+
+    class Params:
+        device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
+        cmake_build_type: config.CMakeBuildType = factory.LazyFunction(
+            lambda: config.CMAKE_BUILD_TYPE
+        )
+        use_field_canonical_representation: bool = False
+
+    translation = factory.SubFactory(
+        DaCeTranslationStepFactory,
+        device_type=factory.SelfAttribute("..device_type"),
+        use_field_canonical_representation=factory.SelfAttribute(
+            "..use_field_canonical_representation"
+        ),
+    )
+    bindings = _no_bindings
+    compilation = factory.SubFactory(
+        DaCeCompilationStepFactory,
+        cache_lifetime=factory.LazyFunction(lambda: config.BUILD_CACHE_LIFETIME),
+        cmake_build_type=factory.SelfAttribute("..cmake_build_type"),
+    )
+    decoration = factory.LazyAttribute(
+        lambda o: functools.partial(
+            convert_args,
+            device=o.device_type,
+            use_field_canonical_representation=o.use_field_canonical_representation,
+        )
+    )
+
+
+class DaCeBackendFactory(GTFNBackendFactory):
+    class Params:
+        otf_workflow = factory.SubFactory(
+            DaCeWorkflowFactory,
+            device_type=factory.SelfAttribute("..device_type"),
+            use_field_canonical_representation=factory.SelfAttribute(
+                "..use_field_canonical_representation"
+            ),
+        )
+        name = factory.LazyAttribute(
+            lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}"
+        )
+        auto_optimize = factory.Trait(
+            otf_workflow__translation__auto_optimize=True,
+            name_temps="_opt",
+        )
+        use_field_canonical_representation: bool = False
+
+
+run_dace_cpu = DaCeBackendFactory(cached=True, auto_optimize=True)
+
+run_dace_gpu = DaCeBackendFactory(gpu=True, cached=True, auto_optimize=True)
diff --git a/src/gt4py/next/program_processors/runners/dace_iterator/workflow.py b/src/gt4py/next/program_processors/runners/dace_iterator/workflow.py
@@ -0,0 +1,181 @@
+# GT4Py - GridTools Framework
+#
+# Copyright (c) 2014-2023, ETH Zurich
+# All rights reserved.
+#
+# This file is part of the GT4Py project and the GridTools framework.
+# GT4Py is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or any later
+# version. See the LICENSE.txt file at the top-level directory of this
+# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+from __future__ import annotations
+
+import dataclasses
+from typing import Callable, Optional, cast
+
+import dace
+import factory
+from dace.codegen.compiled_sdfg import CompiledSDFG
+
+from gt4py._core import definitions as core_defs
+from gt4py.next import common, config
+from gt4py.next.common import Dimension
+from gt4py.next.iterator import ir as itir
+from gt4py.next.iterator.transforms import LiftMode
+from gt4py.next.otf import languages, stages, step_types, workflow
+from gt4py.next.otf.binding import interface
+from gt4py.next.otf.compilation import cache
+from gt4py.next.otf.languages import LanguageSettings
+from gt4py.next.type_system import type_translation as tt
+
+from . import build_sdfg_from_itir, get_sdfg_args
+
+
+@dataclasses.dataclass(frozen=True)
+class DaCeTranslator(
+    workflow.ChainableWorkflowMixin[
+        stages.ProgramCall,
+        stages.ProgramSource[languages.SDFG, languages.LanguageSettings],
+    ],
+    step_types.TranslationStep[languages.SDFG, languages.LanguageSettings],
+):
+    auto_optimize: bool = False
+    lift_mode: LiftMode = LiftMode.FORCE_INLINE
+    device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
+    temporary_extraction_heuristics: Optional[
+        Callable[[itir.StencilClosure], Callable[[itir.Expr], bool]]
+    ] = None
+    use_field_canonical_representation: bool = False
+
+    def _language_settings(self) -> languages.LanguageSettings:
+        return languages.LanguageSettings(
+            formatter_key="",
+            formatter_style="",
+            file_extension="sdfg",
+        )
+
+    def __call__(
+        self,
+        inp: stages.ProgramCall,
+    ) -> stages.ProgramSource[languages.SDFG, LanguageSettings]:
+        """Generate DaCe SDFG file from the ITIR definition."""
+        program: itir.FencilDefinition = inp.program
+        on_gpu = True if self.device_type == core_defs.DeviceType.CUDA else False
+
+        # ITIR parameters
+        column_axis: Optional[Dimension] = inp.kwargs.get("column_axis", None)
+        offset_provider = inp.kwargs["offset_provider"]
+
+        sdfg = build_sdfg_from_itir(
+            program,
+            *inp.args,
+            offset_provider=offset_provider,
+            auto_optimize=self.auto_optimize,
+            on_gpu=on_gpu,
+            column_axis=column_axis,
+            lift_mode=self.lift_mode,
+            load_sdfg_from_file=False,
+            save_sdfg=False,
+            use_field_canonical_representation=self.use_field_canonical_representation,
+        )
+
+        arg_types = tuple(
+            interface.Parameter(param, tt.from_value(arg))
+            for param, arg in zip(sdfg.arg_names, inp.args)
+        )
+
+        module: stages.ProgramSource[languages.SDFG, languages.LanguageSettings] = (
+            stages.ProgramSource(
+                entry_point=interface.Function(program.id, arg_types),
+                source_code=sdfg.to_json(),
+                library_deps=tuple(),
+                language=languages.SDFG,
+                language_settings=self._language_settings(),
+            )
+        )
+        return module
+
+
+class DaCeTranslationStepFactory(factory.Factory):
+    class Meta:
+        model = DaCeTranslator
+
+
+@dataclasses.dataclass(frozen=True)
+class DaCeCompiler(
+    workflow.ChainableWorkflowMixin[
+        stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
+        stages.CompiledProgram,
+    ],
+    workflow.ReplaceEnabledWorkflowMixin[
+        stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
+        stages.CompiledProgram,
+    ],
+    step_types.CompilationStep[languages.SDFG, languages.LanguageSettings, languages.Python],
+):
+    """Use the dace build system to compile a GT4Py program to a ``gt4py.next.otf.stages.CompiledProgram``."""
+
+    cache_lifetime: config.BuildCacheLifetime
+    device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
+    cmake_build_type: config.CMakeBuildType = config.CMakeBuildType.DEBUG
+
+    def __call__(
+        self,
+        inp: stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
+    ) -> stages.CompiledProgram:
+        sdfg = dace.SDFG.from_json(inp.program_source.source_code)
+
+        src_dir = cache.get_cache_folder(inp, self.cache_lifetime)
+        sdfg.build_folder = src_dir / ".dacecache"
+
+        with dace.config.temporary_config():
+            dace.config.Config.set("compiler", "build_type", value=self.cmake_build_type.value)
+            if self.device_type == core_defs.DeviceType.CPU:
+                compiler_args = dace.config.Config.get("compiler", "cpu", "args")
+                # disable finite-math-only in order to support isfinite/isinf/isnan builtins
+                if "-ffast-math" in compiler_args:
+                    compiler_args += " -fno-finite-math-only"
+                if "-ffinite-math-only" in compiler_args:
+                    compiler_args.replace("-ffinite-math-only", "")
+
+                dace.config.Config.set("compiler", "cpu", "args", value=compiler_args)
+            sdfg_program = sdfg.compile(validate=False)
+
+        return sdfg_program
+
+
+class DaCeCompilationStepFactory(factory.Factory):
+    class Meta:
+        model = DaCeCompiler
+
+
+def convert_args(
+    inp: stages.CompiledProgram,
+    device: core_defs.DeviceType = core_defs.DeviceType.CPU,
+    use_field_canonical_representation: bool = False,
+) -> stages.CompiledProgram:
+    sdfg_program = cast(CompiledSDFG, inp)
+    on_gpu = True if device == core_defs.DeviceType.CUDA else False
+    sdfg = sdfg_program.sdfg
+
+    def decorated_program(
+        *args, offset_provider: dict[str, common.Connectivity | common.Dimension]
+    ):
+        sdfg_args = get_sdfg_args(
+            sdfg,
+            *args,
+            check_args=False,
+            offset_provider=offset_provider,
+            on_gpu=on_gpu,
+            use_field_canonical_representation=use_field_canonical_representation,
+        )
+
+        with dace.config.temporary_config():
+            dace.config.Config.set("compiler", "allow_view_arguments", value=True)
+            return inp(**sdfg_args)
+
+    return decorated_program
diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py
@@ -83,20 +83,8 @@ class EmbeddedIds(_PythonObjectIdMixin, str, enum.Enum):
 
 
 class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum):
-    DACE_CPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_cpu"
-    DACE_GPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_gpu"
-
-
-class ProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum):
-    GTFN_CPU_EXECUTOR = f"{ProgramBackendId.GTFN_CPU}.executor"
-    GTFN_CPU_IMPERATIVE_EXECUTOR = f"{ProgramBackendId.GTFN_CPU_IMPERATIVE}.executor"
-    GTFN_CPU_WITH_TEMPORARIES = f"{ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES}.executor"
-    ROUNDTRIP = f"{ProgramBackendId.ROUNDTRIP}.executor"
-    DOUBLE_ROUNDTRIP = f"{ProgramBackendId.DOUBLE_ROUNDTRIP}.executor"
-
-
-class OptionalProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum):
-    DACE_CPU_EXECUTOR = f"{OptionalProgramBackendId.DACE_CPU}.executor"
+    DACE_CPU = "gt4py.next.program_processors.runners.dace.run_dace_cpu"
+    DACE_GPU = "gt4py.next.program_processors.runners.dace.run_dace_gpu"
 
 
 class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):