Small fixes. All Icon4Py tests except for as_offsets pass with gtfn_cpu & temporaries

tehrengruber · tehrengruber · commit 927026288ab4 · 2024-01-26T01:58:52.000+01:00
diff --git a/src/gt4py/next/ffront/foast_to_itir.py b/src/gt4py/next/ffront/foast_to_itir.py
@@ -342,7 +342,17 @@ def visit_BinOp(self, node: foast.BinOp, **kwargs) -> itir.FunCall:
         return self._map(node.op.value, node.left, node.right)
 
     def visit_TernaryExpr(self, node: foast.TernaryExpr, **kwargs) -> itir.FunCall:
-        return im.if_(im.deref(self.visit(node.condition, **kwargs)), self.visit(node.true_expr, **kwargs), self.visit(node.false_expr, **kwargs))
+        op = "if_"
+        args = (node.condition, node.true_expr, node.false_expr)
+        lowered_args = [to_iterator_of_tuples(self.visit(arg, **kwargs), arg.type) for arg in args]
+        if any(type_info.contains_local_field(arg.type) for arg in args):
+            lowered_args = [promote_to_list(arg)(larg) for arg, larg in zip(args, lowered_args)]
+            op = im.call("map_")(op)
+
+        return to_tuples_of_iterator(im.promote_to_lifted_stencil(im.call(op))(*lowered_args), node.type)
+
+        # TODO: iterator of tuples?
+        #return im.if_(im.deref(self.visit(node.condition, **kwargs)), self.visit(node.true_expr, **kwargs), self.visit(node.false_expr, **kwargs))
 
     def visit_Compare(self, node: foast.Compare, **kwargs) -> itir.FunCall:
         return self._map(node.op.value, node.left, node.right)
diff --git a/src/gt4py/next/iterator/transforms/inline_lifts.py b/src/gt4py/next/iterator/transforms/inline_lifts.py
@@ -218,6 +218,11 @@ def visit_FunCall(
     ):
         symtable = kwargs["symtable"]
 
+        ignore_recorded_shifts_missing = (kwargs.get("ignore_recorded_shifts_missing", False) or (
+                    hasattr(node.annex, "recorded_shifts") and len(
+                node.annex.recorded_shifts) == 0))
+        kwargs = {**kwargs, "ignore_recorded_shifts_missing": ignore_recorded_shifts_missing}
+
         recorded_shifts_annex = getattr(node.annex, "recorded_shifts", None)
         old_node = node
         node = (
@@ -306,13 +311,13 @@ def visit_FunCall(
             and len(node.args) > 0
             and self.predicate(node, is_scan_pass_context)
         ):
-            if not hasattr(node.annex, "recorded_shifts"):
+            if not ignore_recorded_shifts_missing and not hasattr(node.annex, "recorded_shifts"):
                 breakpoint()
 
             # if the lift is never derefed its params also don't have a recorded_shifts attr and the
             #  following will fail. we don't care about such lifts anyway as they are later on and
             #  disappear
-            if len(node.annex.recorded_shifts) == 0:
+            if ignore_recorded_shifts_missing or len(node.annex.recorded_shifts) == 0:
                 return node
 
             stencil = node.fun.args[0]  # type: ignore[attr-defined] # node already asserted to be of type ir.FunCall
diff --git a/src/gt4py/next/iterator/transforms/pass_manager.py b/src/gt4py/next/iterator/transforms/pass_manager.py
@@ -100,21 +100,22 @@ def visit_StencilClosure(self, node: ir.StencilClosure):
         ValidateRecordedShiftsAnnex().visit(node)
         return self.generic_visit(node)
 
-    def visit_FunCall(self, node: ir.FunCall):
+    def visit_FunCall(self, node: ir.FunCall, **kwargs):
         old_node = node
-        node = self.generic_visit(node)
+        node = self.generic_visit(node,
+                                  ignore_recorded_shifts_missing=(kwargs.get("ignore_recorded_shifts_missing", False) or (hasattr(node.annex, "recorded_shifts") and len(node.annex.recorded_shifts) == 0)))
         #ValidateRecordedShiftsAnnex().visit(node)
         if isinstance(node.fun, ir.Lambda):
             eligible_params = [False] * len(node.fun.params)
 
             # force inline lift args derefed at at most a single position
             new_args = []
             bound_scalars = {}
-            # TODO: what is node.fun is not a lambda? e.g. directly deref?
+            # TODO: what if node.fun is not a lambda? e.g. directly deref?
             for i, (param, arg) in enumerate(zip(node.fun.params, node.args)):
-                if common_pattern_matcher.is_applied_lift(arg) and not hasattr(param.annex, "recorded_shifts"):
+                if not kwargs.get("ignore_recorded_shifts_missing", False) and common_pattern_matcher.is_applied_lift(arg) and not hasattr(param.annex, "recorded_shifts"):
                     breakpoint()
-                if common_pattern_matcher.is_applied_lift(arg) and param.annex.recorded_shifts in [set(), {()}]:
+                if not kwargs.get("ignore_recorded_shifts_missing", False) and common_pattern_matcher.is_applied_lift(arg) and param.annex.recorded_shifts in [set(), {()}]:
                     eligible_params[i] = True
                     global unique_id
                     bound_arg_name = f"__wtf{unique_id}"
@@ -123,6 +124,7 @@ def visit_FunCall(self, node: ir.FunCall):
                     capture_lift.annex.recorded_shifts = param.annex.recorded_shifts
                     new_args.append(capture_lift)
                     bound_scalars[bound_arg_name] = InlineLifts(flags=InlineLifts.Flag.INLINE_TRIVIAL_DEREF_LIFT).visit(im.deref(arg), recurse=False)
+                    ValidateRecordedShiftsAnnex().visit(bound_scalars[bound_arg_name])
                 else:
                     new_args.append(arg)
 
@@ -198,7 +200,7 @@ def apply_common_transforms(
     ] = None,
     symbolic_domain_sizes: Optional[dict[str, str]] = None,
 ):
-    lift_mode = LiftMode.FORCE_TEMPORARIES
+    #lift_mode = LiftMode.FORCE_TEMPORARIES
 
     if lift_mode is None:
         lift_mode = LiftMode.FORCE_INLINE
@@ -231,7 +233,7 @@ def apply_common_transforms(
             inlined = InlineLambdas.apply(
                 inlined,
                 opcount_preserving=True,
-                force_inline_lift_args=True,
+                force_inline_lift_args=True,  # todo: this is still needed as we can not extract a lift from a conditional
             )
             if inlined == ir:
                 break
diff --git a/src/gt4py/next/program_processors/codegens/gtfn/codegen.py b/src/gt4py/next/program_processors/codegens/gtfn/codegen.py
@@ -108,7 +108,7 @@ def visit_Literal(self, node: gtfn_ir.Literal, **kwargs: Any) -> str:
             case _:
                 result = node.value
         # TODO: isn't this wrong and int32 should also be casted to int32?
-        if node.type in ["float64", "float32", "int32", "int64"]:
+        if node.type in ["float64", "float32", "int32", "int64", "bool"]:
             result = f"({result})"
         elif node.type == "axis_literal":
             pass
diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py
@@ -214,3 +214,14 @@ def compilation_hash(otf_closure: stages.ProgramCall) -> int:
     executor=gtfn_gpu_cached_executor,
     allocator=next_allocators.StandardGPUFieldBufferAllocator(),
 )
+
+run_gtfn_with_temporaries_cached_executor = otf_compile_executor.CachedOTFCompileExecutor(
+    name="run_gtfn_with_temporaries_cached",
+    otf_workflow=workflow.CachedStep(
+        step=run_gtfn_with_temporaries.executor.otf_workflow, hash_function=compilation_hash
+    ),
+)
+run_gtfn_with_temporaries_cached = otf_compile_executor.OTFBackend(
+    executor=run_gtfn_with_temporaries_cached_executor,
+    allocator=next_allocators.StandardCPUFieldBufferAllocator(),
+)