From 3e75a3dfbf92af0982e7ece6c1c2faa7747689b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Tue, 11 Feb 2025 15:08:32 +0000 Subject: [PATCH] [AIE2p] Enable register re-allocation --- llvm/lib/Target/AIE/AIE2TargetMachine.cpp | 5 +- llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp | 5 ++ .../Target/AIE/aie2p/AIE2PTargetMachine.cpp | 5 ++ .../CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll | 4 ++ .../CodeGen/AIE/aie2p/ra/waw_reg_renaming.mir | 56 +++++++++++++++++++ .../schedule/postpipeliner/end-to-end.ll | 34 +++++------ 6 files changed, 89 insertions(+), 20 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2p/ra/waw_reg_renaming.mir diff --git a/llvm/lib/Target/AIE/AIE2TargetMachine.cpp b/llvm/lib/Target/AIE/AIE2TargetMachine.cpp index 1ba4a81e394a..e451f3c94e3f 100644 --- a/llvm/lib/Target/AIE/AIE2TargetMachine.cpp +++ b/llvm/lib/Target/AIE/AIE2TargetMachine.cpp @@ -27,10 +27,6 @@ cl::opt EnableSubregRenaming("aie-subreg-renaming", cl::Hidden, cl::init(false), cl::desc("Enable RenameIndependentSubregs pass")); -static cl::opt - EnableWAWRegRewrite("aie-wawreg-rewrite", - cl::desc("Enable the WAW Register Renaming in loops"), - cl::init(true), cl::Hidden); static cl::opt EnableReservedRegsLICM("aie-reserved-regs-licm", cl::Hidden, cl::init(true), cl::desc("Enable LICM for some reserved registers")); @@ -45,6 +41,7 @@ extern cl::opt EnableStagedRA; extern cl::opt EnableSuperRegSplitting; extern cl::opt AllocateMRegsFirst; extern cl::opt EnablePreMISchedCoalescer; +extern cl::opt EnableWAWRegRewrite; extern bool AIEDumpArtifacts; diff --git a/llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp b/llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp index 53888fc9c79f..dbd15a05eeb0 100644 --- a/llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp +++ b/llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp @@ -77,6 +77,11 @@ cl::opt EnableStagedRA("aie-staged-ra", cl::Hidden, cl::init(true), cl::desc("Enable multi-stage register allocation")); +cl::opt + EnableWAWRegRewrite("aie-wawreg-rewrite", + cl::desc("Enable the WAW Register Renaming in loops"), + cl::init(true), cl::Hidden); + cl::opt EnableSuperRegSplitting("aie-split-superregs", cl::Hidden, cl::init(true), cl::desc("Enable splitting super-regs into their " diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp index 7ea7bc8ab9cf..ab0158f21334 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp @@ -22,6 +22,7 @@ extern cl::opt EnableSuperRegSplitting; extern cl::opt AllocateMRegsFirst; extern cl::opt EnablePreMISchedCoalescer; extern cl::opt EnableAddressChaining; +extern cl::opt EnableWAWRegRewrite; void AIE2PTargetMachine::anchor() {} @@ -102,6 +103,10 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() { addPass(createAIESuperRegRewriter()); } addPass(createGreedyRegisterAllocator()); + if (EnableWAWRegRewrite) { + addPass(createAIEWawRegRewriter()); + addPass(createGreedyRegisterAllocator()); + } addPass(createVirtRegRewriter()); return true; diff --git a/llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll b/llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll index 4acdfb6ebdbf..982e6fe361fd 100644 --- a/llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll +++ b/llvm/test/CodeGen/AIE/aie2p/llc-pipeline-aie2p.ll @@ -235,6 +235,8 @@ ; AIE-O1-NEXT: Greedy Register Allocator ; AIE-O1-NEXT: AIE super-reg rewrite ; AIE-O1-NEXT: Greedy Register Allocator +; AIE-O1-NEXT: AIE waw-reg rewrite +; AIE-O1-NEXT: Greedy Register Allocator ; AIE-O1-NEXT: Virtual Register Rewriter ; AIE-O1-NEXT: Stack Slot Coloring ; AIE-O1-NEXT: AIE 1D operands to 2D/3D rewriter @@ -441,6 +443,8 @@ ; AIE-O23-NEXT: Greedy Register Allocator ; AIE-O23-NEXT: AIE super-reg rewrite ; AIE-O23-NEXT: Greedy Register Allocator +; AIE-O23-NEXT: AIE waw-reg rewrite +; AIE-O23-NEXT: Greedy Register Allocator ; AIE-O23-NEXT: Virtual Register Rewriter ; AIE-O23-NEXT: Stack Slot Coloring ; AIE-O23-NEXT: AIE 1D operands to 2D/3D rewriter diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/waw_reg_renaming.mir b/llvm/test/CodeGen/AIE/aie2p/ra/waw_reg_renaming.mir new file mode 100644 index 000000000000..261cd827283c --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/ra/waw_reg_renaming.mir @@ -0,0 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# NOTE: Example file for Write After Write Register Renaming in Loop test +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +# Basic test for the WAW register renaming pass. Check AIE2 tests for more coverage. + +# RUN: llc -mtriple=aie2p -verify-machineinstrs --start-before=greedy --stop-after=virtregrewriter %s -o - | FileCheck %s + +# Make sure VLD and VMAX define different X registers. +--- +name: simple_waw_replacement +alignment: 16 +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: simple_waw_replacement + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $p1, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: LoopStart $r0, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $d0, $d2, $p0, $p1, $p2, $x0, $d1_3d + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x2, renamable $p0 = VLDA_dmx_lda_x_pstm_nrm_imm killed renamable $p0, 64 + ; CHECK-NEXT: renamable $x4, dead renamable $r16 = VMAX_LT_32_vaddSign1 killed renamable $x2, renamable $x0, implicit $vaddsign1 + ; CHECK-NEXT: renamable $p1 = VST_dmx_sts_x_pstm_nrm_imm killed renamable $x4, killed renamable $p1, 64 + ; CHECK-NEXT: PseudoLoopEnd , %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: PseudoRET implicit $lr + bb.0.entry: + successors: %bb.1 + liveins: $r0, $p0, $p1, $x0 + %0:ep = COPY $p0 + %1:vec512 = COPY $x0 + %2:ep = COPY $p1 + LoopStart $r0, 0 + bb.1: + successors: %bb.1, %bb.2 + liveins: $p0, $p1, $p2, $d0, $d1_3d, $d2 + + %10:vec512, %0:ep = VLDA_dmx_lda_x_pstm_nrm_imm %0, 64 + %11:vec512, %12:mr16_vcompare = VMAX_LT_32_vaddSign1 %10, %1, implicit $vaddsign1 + %2:ep = VST_dmx_sts_x_pstm_nrm_imm %11, %2, 64 + PseudoLoopEnd , %bb.1 + bb.2: + PseudoRET implicit $lr +... diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll index 8107e2e98e19..d684a5641161 100644 --- a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll +++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/end-to-end.ll @@ -20,27 +20,29 @@ define <32 x i16> @zol(i32 %n, ptr %p) { ; CHECK-NEXT: add.nc lc, r0, #-7 ; CHECK-NEXT: movxm ls, #.LBB0_1 ; CHECK-NEXT: movxm le, #.L_LEnd0 -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopxm ; nopv -; CHECK-NEXT: // implicit-def: $x0 +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopxm ; nopv +; CHECK-NEXT: // implicit-def: $x2 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: .L_LEnd0: -; CHECK-NEXT: nopa ; vldb x2, [p0], #64; nops ; nopx ; vadd.16 x0, x2, x0; nopv +; CHECK-NEXT: nopa ; vldb x0, [p0], #64; nops ; nopx ; vadd.16 x2, x0, x2; nopv ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup -; CHECK-NEXT: nopa ; nopx ; vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 -; CHECK-NEXT: vadd.16 x0, x2, x0 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: vadd.16 x2, x0, x2 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x0, x2 ; CHECK-NEXT: ret lr ; CHECK-NEXT: nop // Delay Slot 5 ; CHECK-NEXT: nop // Delay Slot 4