Skip to content

Commit

Permalink
[AArch64][SME] Disable inlining of callees with new ZT0 state (llvm#1…
Browse files Browse the repository at this point in the history
…21338)

Inlining must be disabled for new-ZT0 callees as the callee is required
to save ZT0 and toggle PSTATE.ZA on entry.
  • Loading branch information
kmclaughlin-arm authored Jan 6, 2025
1 parent 648e256 commit d8d4c18
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 13 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticFrontendKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ def warn_function_always_inline_attribute_mismatch : Warning<
"inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>;
def err_function_always_inline_new_za : Error<
"always_inline function %0 has new za state">;
def err_function_always_inline_new_zt0
: Error<"always_inline function %0 has new zt0 state">;

def warn_avx_calling_convention
: Warning<"AVX vector %select{return|argument}0 of type %1 without '%2' "
Expand Down
15 changes: 12 additions & 3 deletions clang/lib/CodeGen/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,8 +1169,9 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
enum class ArmSMEInlinability : uint8_t {
Ok = 0,
ErrorCalleeRequiresNewZA = 1 << 0,
WarnIncompatibleStreamingModes = 1 << 1,
ErrorIncompatibleStreamingModes = 1 << 2,
ErrorCalleeRequiresNewZT0 = 1 << 1,
WarnIncompatibleStreamingModes = 1 << 2,
ErrorIncompatibleStreamingModes = 1 << 3,

IncompatibleStreamingModes =
WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
Expand Down Expand Up @@ -1198,9 +1199,12 @@ static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
else
Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
}
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
if (NewAttr->isNewZA())
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
if (NewAttr->isNewZT0())
Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
}

return Inlinability;
}
Expand All @@ -1227,6 +1231,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
ArmSMEInlinability::ErrorCalleeRequiresNewZA)
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
<< Callee->getDeclName();

if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
<< Callee->getDeclName();
}

// If the target does not have floating-point registers, but we are using a
Expand Down
13 changes: 11 additions & 2 deletions clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_FLATTEN -o - | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s

// REQUIRES: aarch64-registered-target

Expand All @@ -20,6 +20,7 @@ void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
void fn_streaming(void) __arm_streaming { was_inlined(); }
__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
__arm_new("zt0") void fn_streaming_new_zt0(void) __arm_streaming { was_inlined(); }

FN_ATTR
void caller(void) {
Expand All @@ -28,6 +29,7 @@ void caller(void) {
STMT_ATTR fn_streaming();
STMT_ATTR fn_locally_streaming();
STMT_ATTR fn_streaming_new_za();
STMT_ATTR fn_streaming_new_zt0();
}
// CHECK-LABEL: void @caller()
// CHECK-NEXT: entry:
Expand All @@ -36,13 +38,15 @@ void caller(void) {
// CHECK-NEXT: call void @fn_streaming
// CHECK-NEXT: call void @fn_locally_streaming
// CHECK-NEXT: call void @fn_streaming_new_za
// CHECK-NEXT: call void @fn_streaming_new_zt0

FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
STMT_ATTR fn();
STMT_ATTR fn_streaming_compatible();
STMT_ATTR fn_streaming();
STMT_ATTR fn_locally_streaming();
STMT_ATTR fn_streaming_new_za();
STMT_ATTR fn_streaming_new_zt0();
}
// CHECK-LABEL: void @caller_streaming_compatible()
// CHECK-NEXT: entry:
Expand All @@ -51,13 +55,15 @@ FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
// CHECK-NEXT: call void @fn_streaming
// CHECK-NEXT: call void @fn_locally_streaming
// CHECK-NEXT: call void @fn_streaming_new_za
// CHECK-NEXT: call void @fn_streaming_new_zt0

FN_ATTR void caller_streaming(void) __arm_streaming {
STMT_ATTR fn();
STMT_ATTR fn_streaming_compatible();
STMT_ATTR fn_streaming();
STMT_ATTR fn_locally_streaming();
STMT_ATTR fn_streaming_new_za();
STMT_ATTR fn_streaming_new_zt0();
}
// CHECK-LABEL: void @caller_streaming()
// CHECK-NEXT: entry:
Expand All @@ -66,6 +72,7 @@ FN_ATTR void caller_streaming(void) __arm_streaming {
// CHECK-NEXT: call void @was_inlined
// CHECK-NEXT: call void @was_inlined
// CHECK-NEXT: call void @fn_streaming_new_za
// CHECK-NEXT: call void @fn_streaming_new_zt0

FN_ATTR __arm_locally_streaming
void caller_locally_streaming(void) {
Expand All @@ -74,6 +81,7 @@ void caller_locally_streaming(void) {
STMT_ATTR fn_streaming();
STMT_ATTR fn_locally_streaming();
STMT_ATTR fn_streaming_new_za();
STMT_ATTR fn_streaming_new_zt0();
}
// CHECK-LABEL: void @caller_locally_streaming()
// CHECK-NEXT: entry:
Expand All @@ -82,3 +90,4 @@ void caller_locally_streaming(void) {
// CHECK-NEXT: call void @was_inlined
// CHECK-NEXT: call void @was_inlined
// CHECK-NEXT: call void @fn_streaming_new_za
// CHECK-NEXT: call void @fn_streaming_new_zt0
12 changes: 8 additions & 4 deletions clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_NONE %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_COMPATIBLE %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_STREAMING %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_LOCALLY %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_NONE %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_COMPATIBLE %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_STREAMING %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_LOCALLY %s

// REQUIRES: aarch64-registered-target

Expand All @@ -10,13 +10,17 @@ __ai void inlined_fn(void) {}
__ai void inlined_fn_streaming_compatible(void) __arm_streaming_compatible {}
__ai void inlined_fn_streaming(void) __arm_streaming {}
__ai __arm_locally_streaming void inlined_fn_local(void) {}
__ai __arm_new("za") void inlined_fn_za(void) {}
__ai __arm_new("zt0") void inlined_fn_zt0(void) {}

#ifdef TEST_NONE
void caller(void) {
inlined_fn();
inlined_fn_streaming_compatible();
inlined_fn_streaming(); // expected-error {{always_inline function 'inlined_fn_streaming' and its caller 'caller' have mismatching streaming attributes}}
inlined_fn_local(); // expected-error {{always_inline function 'inlined_fn_local' and its caller 'caller' have mismatching streaming attributes}}
inlined_fn_za(); // expected-error {{always_inline function 'inlined_fn_za' has new za state}}
inlined_fn_zt0(); // expected-error {{always_inline function 'inlined_fn_zt0' has new zt0 state}}
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
CalleeAttrs.set(SMEAttrs::SM_Enabled, true);
}

if (CalleeAttrs.isNewZA())
if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0())
return false;

if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
Expand Down
30 changes: 27 additions & 3 deletions llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,33 @@ define void @nonzt0_callee() {
ret void
}

define void @new_zt0_callee() "aarch64_new_zt0" {
; CHECK-LABEL: define void @new_zt0_callee
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @inlined_body()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; inlineasm", ""()
call void @inlined_body()
ret void
}

define void @nonzt0_caller_new_zt0_callee_dont_inline() {
; CHECK-LABEL: define void @nonzt0_caller_new_zt0_callee_dont_inline
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @new_zt0_callee()
; CHECK-NEXT: ret void
;
entry:
call void @new_zt0_callee()
ret void
}

define void @shared_zt0_caller_nonzt0_callee_dont_inline() "aarch64_inout_zt0" {
; CHECK-LABEL: define void @shared_zt0_caller_nonzt0_callee_dont_inline
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: call void @nonzt0_callee()
; CHECK-NEXT: ret void
;
Expand All @@ -403,7 +427,7 @@ define void @shared_zt0_caller_nonzt0_callee_dont_inline() "aarch64_inout_zt0" {

define void @shared_zt0_callee() "aarch64_inout_zt0" {
; CHECK-LABEL: define void @shared_zt0_callee
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @inlined_body()
; CHECK-NEXT: ret void
Expand All @@ -415,7 +439,7 @@ define void @shared_zt0_callee() "aarch64_inout_zt0" {

define void @shared_zt0_caller_shared_zt0_callee_inline() "aarch64_inout_zt0" {
; CHECK-LABEL: define void @shared_zt0_caller_shared_zt0_callee_inline
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @inlined_body()
; CHECK-NEXT: ret void
Expand Down

0 comments on commit d8d4c18

Please sign in to comment.