From 0d0a59dc3dbfbb920c3040d9e4675cd86e761d1e Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Mon, 21 Apr 2025 17:25:57 +0000 Subject: [PATCH 1/3] gfx950 stuff --- build_rocm_python3 | 4 ++-- .../optimizers/auto_mixed_precision.cc | 4 ++-- .../optimizers/generic_layout_optimizer.cc | 1 + .../Dockerfile.rocm.manylinux2014 | 2 +- .../Dockerfile.rocm.manylinux_2_28 | 2 +- .../Dockerfile.rocm.ub20 | 2 +- .../Dockerfile.rocm.ub22 | 2 +- .../Dockerfile.rocm.ub24 | 2 +- .../tf_sig_build_dockerfiles/setup.rocm.sh | 2 +- .../gpu/llvm_gpu_backend/amdgpu_backend.cc | 5 +++-- .../gpu/transforms/conv_rewriter_test.cc | 6 ++++++ .../xla/stream_executor/device_description.h | 18 +++++++++--------- 12 files changed, 29 insertions(+), 21 deletions(-) diff --git a/build_rocm_python3 b/build_rocm_python3 index 6f4d977b6b4327..001448531bcf2d 100755 --- a/build_rocm_python3 +++ b/build_rocm_python3 @@ -27,10 +27,10 @@ shift "$((OPTIND-1))" # This is not a release branch, so force a nightly build # TODO remove this when branching for release -nightly=true +#nightly=true # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm/ +ROCM_INSTALL_DIR=/opt/rocm-6.5.0 if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 072f9341a04674..69009727f897bc 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -116,8 +116,8 @@ bool HasFastFP16Support(const DeviceProperties& props) { #elif TENSORFLOW_USE_ROCM absl::flat_hash_set FP16SupportedDevices = { {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, - {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, - {"gfx1100"}, {"gfx1101"}, {"gfx1102"}, + {"gfx942"}, {"gfx950"}, {"gfx1010"}, {"gfx1012"}, + {"gfx1030"}, {"gfx1100"}, {"gfx1101"}, {"gfx1102"}, {"gfx1200"}, {"gfx1201"} }; std::string gcnArchName = props.environment().at("architecture"); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index bafacd23b9ec05..7d399e158ddf6d 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -71,6 +71,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) { if ((compute_capability_it->second == "gfx908" || compute_capability_it->second == "gfx90a" || compute_capability_it->second == "gfx942" || + compute_capability_it->second == "gfx950" || compute_capability_it->second == "gfx1101" || compute_capability_it->second == "gfx1102" || compute_capability_it->second == "gfx1200" || diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 index f0767d43421a26..14bae58a64dbba 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 index 4045025afaca6e..a1541d1f15d399 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 index 406a006a32adeb..8c338497819203 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 @@ -2,7 +2,7 @@ FROM ubuntu:20.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 index 8c6e0c5cf4863c..7ba564ea8a78f5 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 @@ -2,7 +2,7 @@ FROM ubuntu:22.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 index 967da0f0bb7e5e..8cf4005c6eaa57 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 @@ -2,7 +2,7 @@ FROM ubuntu:24.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh index 54b7be29f9a1c4..4545a78a61183a 100755 --- a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh +++ b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh @@ -145,7 +145,7 @@ then echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc fi -GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx942 gfx1030 gfx1100"} +GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"} echo $ROCM_VERSION echo $ROCM_REPO diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc index 115c156f6be0d8..6e24d46790fcda 100644 --- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc +++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc @@ -339,8 +339,9 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token, if (token == "sramecc+") { return "+sramecc"; } else if (token == "sramecc-") { - if (gfx == "gfx90a" || gfx == "gfx942" || gfx == "gfx1101" || - gfx == "gfx1102" || gfx == "gfx1200" || gfx == "gfx1201") + if (gfx == "gfx90a" || gfx == "gfx942" || gfx == "gfx950" || + gfx == "gfx1101" || gfx == "gfx1102" || gfx == "gfx1200" || + gfx == "gfx1201") return ""; return "-sramecc"; } else if (token == "xnack+") { diff --git a/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc b/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc index 062ae3c9958504..70423c5dd1558a 100644 --- a/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc +++ b/third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc @@ -791,6 +791,12 @@ TEST_F(ConvRewriterTest, TestInvalidTypes) { ::testing::HasSubstr( "FP8 convolutions are only supported on CUDA GPUs"))); + s = ConvRewriter(se::RocmComputeCapability{"gfx950"}).Run(m.get()).status(); + EXPECT_THAT(s, tsl::testing::StatusIs( + absl::StatusCode::kUnimplemented, + ::testing::HasSubstr( + "FP8 convolutions are only supported on CUDA GPUs"))); + // Test unsupported FP8 type module_with_type = absl::StrReplaceAll(module_str, {{"TYPE", "f8e4m3fnuz"}}); TF_ASSERT_OK_AND_ASSIGN(m, ParseAndReturnVerifiedModule(module_with_type)); diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index 996f1349b540aa..73517d909fe06a 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -150,15 +150,15 @@ class RocmComputeCapability { std::string gcn_arch_name_ = "gfx000"; // default to invalid arch. static constexpr absl::string_view kSupportedGfxVersions[]{ - "gfx900", // MI25 - "gfx906", // MI50 / MI60 - "gfx908", // MI100 - "gfx90a", // MI200 - "gfx942", // MI300 - "gfx950", // MI355 - "gfx1030", // RX68xx / RX69xx - "gfx1100", "gfx1101", "gfx1102", // RX7900 - "gfx1200", "gfx1201", // RX8900 + "gfx900", + "gfx906", + "gfx908" + "gfx90a" + "gfx942", + "gfx950", + "gfx1030", + "gfx1100", "gfx1101", "gfx1102", + "gfx1200", "gfx1201", }; }; From 104018bd9bd738ca097a20f1d3871dfbf6f7442e Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Mon, 21 Apr 2025 19:35:36 +0000 Subject: [PATCH 2/3] Container sudoers update --- .../tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 index a1541d1f15d399..f14363ed90279f 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 @@ -68,6 +68,7 @@ ENV TF_NEED_ROCM=1 ENV TF_ROCM_GCC=1 ENV ROCM_TOOLKIT_PATH=${ROCM_PATH} +RUN touch /etc/sudoers.d/sudo-nopasswd RUN echo 'ALL ALL=NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd # Almalinux 8 does not have the default /usr/bin/python From 85961bc9c7ab8bb0205f2f5146a97952473a7fbb Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Mon, 21 Apr 2025 19:48:29 +0000 Subject: [PATCH 3/3] revert change --- build_rocm_python3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_rocm_python3 b/build_rocm_python3 index 001448531bcf2d..b7e72ac63bfcd0 100755 --- a/build_rocm_python3 +++ b/build_rocm_python3 @@ -30,7 +30,7 @@ shift "$((OPTIND-1))" #nightly=true # First positional argument (if any) specifies the ROCM_INSTALL_DIR -ROCM_INSTALL_DIR=/opt/rocm-6.5.0 +ROCM_INSTALL_DIR=/opt/rocm/ if [[ -n $1 ]]; then ROCM_INSTALL_DIR=$1 fi