From e8873ae7dfe68eda04d7656ec93627afe8dc56a1 Mon Sep 17 00:00:00 2001
From: Goran Flegar <gflegar@google.com>
Date: Thu, 18 Jul 2024 19:36:10 +0200
Subject: [PATCH] [BACKEND] Update LLVM version to
 https://github.com/llvm/llvm-project/commit/4e0a0eae58f7a6998866719f7eb970096a2a52e9
 (#4212)

After
https://github.com/llvm/llvm-project/commit/2c1ae801e1b66a09a15028ae4ba614e0911eec00
`EffectInstance` needs an `OpOperand*`, `OpResult` or `BlockArgument` in
its constructor rather than a `Value`, so updated calls to those
constructors to match what upstream is doing.

This also adds the workflow fixes from @ptillet to make the llvm builds
work.
---
 .github/workflows/llvm-build.yml              | 29 +------------
 .../workflows/llvm-build/centos.Dockerfile    | 43 -------------------
 cmake/llvm-hash.txt                           |  2 +-
 lib/Dialect/Triton/IR/Ops.cpp                 |  2 +-
 lib/Dialect/TritonGPU/IR/Dialect.cpp          | 11 ++---
 lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp        | 30 ++++++-------
 test/lib/Instrumentation/GPUHello.cpp         |  1 +
 7 files changed, 26 insertions(+), 92 deletions(-)
 delete mode 100644 .github/workflows/llvm-build/centos.Dockerfile

diff --git a/.github/workflows/llvm-build.yml b/.github/workflows/llvm-build.yml
index a7f8f978331a..9f452ac9c5e1 100644
--- a/.github/workflows/llvm-build.yml
+++ b/.github/workflows/llvm-build.yml
@@ -28,7 +28,6 @@ jobs:
         config:
         - {runner: 'Ubuntu 20.04', runs_on: 'ubuntu-20.04', target-os: 'ubuntu', arch: 'x64'}
         - {runner: 'Ubuntu 20.04 ARM64', runs_on: 'ubuntu-20.04', target-os: 'ubuntu', arch: 'arm64'}
-        - {runner: 'CentOS 7', runs_on: ['self-hosted', 'CPU'], target-os: 'centos', arch: 'x64'}
         - {runner: 'AlmaLinux 8', runs_on: ['self-hosted', 'CPU'], target-os: 'almalinux', arch: 'x64'}
         - {runner: 'MacOS X64', runs_on: 'macos-12', target-os: 'macos', arch: 'x64'}
         - {runner: 'MacOS ARM64', runs_on: 'macos-12', target-os: 'macos', arch: 'arm64'}
@@ -232,30 +231,6 @@ jobs:
 
         tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
 
-
-    - name: Configure, Build, Test, and Install LLVM (CentOS)
-      if: matrix.config.target-os == 'centos'
-      run: |
-        # if this step crashes, it can leave behind a stale docker container
-        docker container prune -f
-        docker rmi -f $(docker images -q)
-
-        docker build --tag llvm-build --build-arg llvm_dir=llvm-project \
-          -f llvm-build/.github/workflows/llvm-build/centos.Dockerfile .
-
-        # Create temporary container to copy cache and installed artifacts.
-        CONTAINER_ID=$(docker create llvm-build)
-        docker cp "${CONTAINER_ID}:/install" "${{ env.llvm_install_dir }}"
-        tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
-
-        # We remove the existing directory, otherwise docker will
-        # create a subdirectory inside the existing directory.
-        rm -rf "${{ env.SCCACHE_DIR }}"
-        docker cp "${CONTAINER_ID}:/sccache" "${{ env.SCCACHE_DIR }}"
-        sudo chown -R "$(id -u -n):$(id -g -n)" "${{ env.SCCACHE_DIR }}"
-
-        docker rm "${CONTAINER_ID}"
-
     - name: Configure, Build, Test, and Install LLVM (AlmaLinux)
       if: matrix.config.target-os == 'almalinux'
       run: |
@@ -297,9 +272,9 @@ jobs:
     - name: Upload LLVM Artifacts to Azure
       if: ${{ (github.repository == 'triton-lang/triton') }}
       run: |
-        az storage blob upload --account-name tritonlang --auth-mode login --container-name llvm-builds --file "${{ env.llvm_install_dir }}.tar.gz" --name "${{ env.llvm_install_dir }}.tar.gz" --overwrite
+        az storage blob upload --account-name oaitriton --auth-mode login --container-name public --file "${{ env.llvm_install_dir }}.tar.gz" --name "llvm-builds/${{ env.llvm_install_dir }}.tar.gz" --overwrite
 
-        URL=$(az storage blob url --account-name tritonlang --auth-mode login --container-name llvm-builds --name "${{ env.llvm_install_dir }}.tar.gz")
+        URL=$(az storage blob url --account-name oaitriton --auth-mode login --container-name public --name "llvm-builds/${{ env.llvm_install_dir }}.tar.gz")
         echo "Blob URL: ${URL}"
 
     - name: Azure Logout
diff --git a/.github/workflows/llvm-build/centos.Dockerfile b/.github/workflows/llvm-build/centos.Dockerfile
deleted file mode 100644
index cd7e852a8b83..000000000000
--- a/.github/workflows/llvm-build/centos.Dockerfile
+++ /dev/null
@@ -1,43 +0,0 @@
-FROM centos:7
-ARG llvm_dir=llvm-project
-# Add the cache artifacts and the LLVM source tree to the container
-ADD sccache /sccache
-ADD "${llvm_dir}" /source/llvm-project
-ENV SCCACHE_DIR="/sccache"
-ENV SCCACHE_CACHE_SIZE="2G"
-
-RUN echo -e "[llvmtoolset-build]\nname=LLVM Toolset 13.0 - Build\nbaseurl=https://buildlogs.centos.org/c7-llvm-toolset-13.0.x86_64/\ngpgcheck=0\nenabled=1" > /etc/yum.repos.d/llvmtoolset-build.repo
-# Install build dependencies
-RUN yum install --assumeyes centos-release-scl
-RUN yum install --assumeyes --nogpgcheck llvm-toolset-13.0
-RUN yum install --assumeyes rh-python38-python-devel rh-python38-python-pip
-SHELL [ "/usr/bin/scl", "enable", "llvm-toolset-13.0", "rh-python38" ]
-
-RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install --upgrade cmake ninja sccache
-
-# Install MLIR's Python Dependencies
-RUN python3 -m pip install -r /source/llvm-project/mlir/python/requirements.txt
-
-# Configure, Build, Test, and Install LLVM
-RUN cmake -GNinja -Bbuild \
-  -DCMAKE_BUILD_TYPE=Release \
-  -DCMAKE_C_COMPILER=clang \
-  -DCMAKE_CXX_COMPILER=clang++ \
-  -DCMAKE_ASM_COMPILER=clang \
-  -DCMAKE_C_COMPILER_LAUNCHER=sccache \
-  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
-  -DCMAKE_CXX_FLAGS="-Wno-everything" \
-  -DCMAKE_LINKER=lld \
-  -DCMAKE_INSTALL_PREFIX="/install" \
-  -DLLVM_BUILD_UTILS=ON \
-  -DLLVM_BUILD_TOOLS=ON \
-  -DLLVM_ENABLE_ASSERTIONS=ON \
-  -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-  -DLLVM_ENABLE_PROJECTS=mlir \
-  -DLLVM_ENABLE_TERMINFO=OFF \
-  -DLLVM_INSTALL_UTILS=ON \
-  -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" \
-  /source/llvm-project/llvm
-
-RUN ninja -C build install
diff --git a/cmake/llvm-hash.txt b/cmake/llvm-hash.txt
index 07a478bd0575..dd6cf2db26d1 100644
--- a/cmake/llvm-hash.txt
+++ b/cmake/llvm-hash.txt
@@ -1 +1 @@
-657ec7320d8a28171755ba0dd5afc570a5a16791
+4e0a0eae58f7a6998866719f7eb970096a2a52e9
diff --git a/lib/Dialect/Triton/IR/Ops.cpp b/lib/Dialect/Triton/IR/Ops.cpp
index de55697746d6..c05709ede229 100644
--- a/lib/Dialect/Triton/IR/Ops.cpp
+++ b/lib/Dialect/Triton/IR/Ops.cpp
@@ -15,7 +15,7 @@ namespace triton {
 void LoadOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getPtr(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getPtrMutable(),
                        triton::GlobalMemory::get());
   if (getIsVolatile())
     effects.emplace_back(MemoryEffects::Write::get(),
diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dialect.cpp
index ed8011b57327..a5ba60be9002 100644
--- a/lib/Dialect/TritonGPU/IR/Dialect.cpp
+++ b/lib/Dialect/TritonGPU/IR/Dialect.cpp
@@ -2879,7 +2879,8 @@ void LocalAllocOp::getEffects(
   effects.emplace_back(MemoryEffects::Allocate::get(),
                        mlir::triton::gpu::SharedMemory::get());
   if (getSrc())
-    effects.emplace_back(MemoryEffects::Write::get(), getResult(),
+    effects.emplace_back(MemoryEffects::Write::get(),
+                         getOperation()->getOpResult(0),
                          mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -2917,7 +2918,7 @@ LogicalResult LocalAllocOp::verify() {
 void LocalLoadOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getSrc(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getSrcMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -2931,7 +2932,7 @@ LogicalResult LocalStoreOp::verify() {
 void LocalStoreOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Write::get(), getDst(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getDstMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -2945,9 +2946,9 @@ LogicalResult AsyncCopyGlobalToLocalOp::verify() {
 void AsyncCopyGlobalToLocalOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getSrc(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getSrcMutable(),
                        mlir::triton::GlobalMemory::get());
-  effects.emplace_back(MemoryEffects::Write::get(), getResult(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getResultMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
diff --git a/lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp b/lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp
index 4d1a752cea12..e1e07809e411 100644
--- a/lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp
+++ b/lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp
@@ -60,13 +60,13 @@ mlir::LogicalResult WarpGroupDotOp::inferReturnTypes(
 void WarpGroupDotOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  auto a = getA();
-  auto b = getB();
-  if (isa<MemDescType>(a.getType()))
-    effects.emplace_back(MemoryEffects::Read::get(), a,
+  auto &a = getAMutable();
+  auto &b = getBMutable();
+  if (isa<MemDescType>(a.get().getType()))
+    effects.emplace_back(MemoryEffects::Read::get(), &a,
                          mlir::triton::gpu::SharedMemory::get());
-  if (isa<MemDescType>(b.getType()))
-    effects.emplace_back(MemoryEffects::Read::get(), b,
+  if (isa<MemDescType>(b.get().getType()))
+    effects.emplace_back(MemoryEffects::Read::get(), &b,
                          mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -99,7 +99,7 @@ LogicalResult InitBarrierOp::verify() {
 void InitBarrierOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Write::get(), getAlloc(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getAllocMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -113,7 +113,7 @@ LogicalResult InvalBarrierOp::verify() {
 void InvalBarrierOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Write::get(), getAlloc(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getAllocMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -127,7 +127,7 @@ LogicalResult BarrierExpectOp::verify() {
 void BarrierExpectOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Write::get(), getAlloc(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getAllocMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -141,7 +141,7 @@ LogicalResult WaitBarrierOp::verify() {
 void WaitBarrierOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getAlloc(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getAllocMutable(),
                        mlir::triton::gpu::SharedMemory::get());
   // Need a side effect to prevent compiler from reordering and removing
   // the wait operation.
@@ -163,11 +163,11 @@ LogicalResult AsyncTMACopyGlobalToLocalOp::verify() {
 void AsyncTMACopyGlobalToLocalOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getDescPtr(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getDescPtrMutable(),
                        mlir::triton::GlobalMemory::get());
-  effects.emplace_back(MemoryEffects::Write::get(), getBarrier(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getBarrierMutable(),
                        mlir::triton::gpu::SharedMemory::get());
-  effects.emplace_back(MemoryEffects::Write::get(), getResult(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getResultMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
@@ -175,9 +175,9 @@ void AsyncTMACopyGlobalToLocalOp::getEffects(
 void AsyncTMACopyLocalToGlobalOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
         &effects) {
-  effects.emplace_back(MemoryEffects::Write::get(), getDescPtr(),
+  effects.emplace_back(MemoryEffects::Write::get(), &getDescPtrMutable(),
                        mlir::triton::GlobalMemory::get());
-  effects.emplace_back(MemoryEffects::Read::get(), getSrc(),
+  effects.emplace_back(MemoryEffects::Read::get(), &getSrcMutable(),
                        mlir::triton::gpu::SharedMemory::get());
 }
 
diff --git a/test/lib/Instrumentation/GPUHello.cpp b/test/lib/Instrumentation/GPUHello.cpp
index f72d188a78ea..3bee8ce90ced 100644
--- a/test/lib/Instrumentation/GPUHello.cpp
+++ b/test/lib/Instrumentation/GPUHello.cpp
@@ -1,4 +1,5 @@
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Passes/PassBuilder.h"