Skip to content

Commit ed6bf74

Browse files
committed
Added support for gfx950 architecture platform
1 parent 6304124 commit ed6bf74

File tree

12 files changed

+24
-13
lines changed

12 files changed

+24
-13
lines changed

tensorflow/core/grappler/optimizers/auto_mixed_precision.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ bool HasFastFP16Support(const DeviceProperties& props) {
116116
#elif TENSORFLOW_USE_ROCM
117117
absl::flat_hash_set<std::string> FP16SupportedDevices = {
118118
{"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"},
119-
{"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"},
119+
{"gfx942"}, {"gfx950"},
120+
{"gfx1010"}, {"gfx1012"}, {"gfx1030"},
120121
{"gfx1100"}, {"gfx1101"}, {"gfx1102"},
121122
{"gfx1200"}, {"gfx1201"}
122123
};

tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) {
7373
compute_capability_it->second == "gfx940" ||
7474
compute_capability_it->second == "gfx941" ||
7575
compute_capability_it->second == "gfx942" ||
76+
compute_capability_it->second == "gfx950" ||
7677
compute_capability_it->second == "gfx1101" ||
7778
compute_capability_it->second == "gfx1102" ||
7879
compute_capability_it->second == "gfx1200" ||

tensorflow/core/util/gpu_device_functions.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) {
743743
}
744744
#endif
745745

746-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
746+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
747747

748748
#define ADDRSP1 __attribute__((address_space(1)))
749749
__device__ float
@@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) {
963963
}
964964
#endif
965965

966-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
966+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
967967
// Low level instructions don't return. For now, assume that return value
968968
// is always unused.
969969
__device__ float GpuAtomicAdd(float* dst, float val) {
@@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) {
978978
return GpuAtomicAdd(ptr, value);
979979
}
980980

981-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
981+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
982982
__device__ float GpuAtomicAddShared(float* dst, float val) {
983983
atomicAdd(dst, val);
984984
return val;

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
88
COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
99
RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt
1010

11-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
11+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
1212
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
1313

1414
# Install ROCM

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
1717
COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
1818
RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt
1919

20-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
20+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
2121
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
2222

2323
# Install ROCM

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:20.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:22.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:24.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ then
145145
echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc
146146
fi
147147

148-
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"}
148+
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100"}
149149

150150
echo $ROCM_VERSION
151151
echo $ROCM_REPO

third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token,
340340
return "+sramecc";
341341
} else if (token == "sramecc-") {
342342
if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" ||
343-
gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" ||
343+
gfx == "gfx942" || gfx == "gfx950" ||
344+
gfx == "gfx1101" || gfx == "gfx1102" ||
344345
gfx == "gfx1200" || gfx == "gfx1201")
345346
return "";
346347
return "-sramecc";

third_party/xla/xla/service/gpu/transforms/conv_rewriter_test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,13 @@ TEST_F(ConvRewriterTest, TestInvalidTypes) {
791791
::testing::HasSubstr(
792792
"FP8 convolutions are only supported on CUDA GPUs")));
793793

794+
s = ConvRewriter(se::RocmComputeCapability{"gfx950"}).Run(m.get()).status();
795+
EXPECT_THAT(s, tsl::testing::StatusIs(
796+
absl::StatusCode::kUnimplemented,
797+
::testing::HasSubstr(
798+
"FP8 convolutions are only supported on CUDA GPUs")));
799+
800+
794801
// Test unsupported FP8 type
795802
module_with_type = absl::StrReplaceAll(module_str, {{"TYPE", "f8e4m3fnuz"}});
796803
TF_ASSERT_OK_AND_ASSIGN(m, ParseAndReturnVerifiedModule(module_with_type));

third_party/xla/xla/stream_executor/device_description.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,13 @@ class RocmComputeCapability {
7878

7979
bool gfx9_mi100_or_later() const {
8080
static constexpr absl::string_view kList[] = {"gfx908", "gfx90a", "gfx940",
81-
"gfx941", "gfx942"};
81+
"gfx941", "gfx942", "gfx950"};
8282
return absl::c_count(kList, gfx_version()) != 0;
8383
}
8484

8585
bool gfx9_mi200_or_later() const {
8686
static constexpr absl::string_view kList[] = {"gfx90a", "gfx940", "gfx941",
87-
"gfx942"};
87+
"gfx942", "gfx950"};
8888
return absl::c_count(kList, gfx_version()) != 0;
8989
}
9090

@@ -157,6 +157,7 @@ class RocmComputeCapability {
157157
"gfx908", // MI100
158158
"gfx90a", // MI200
159159
"gfx940", "gfx941", "gfx942", // MI300
160+
"gfx950",
160161
"gfx1030", // RX68xx / RX69xx
161162
"gfx1100", "gfx1101", "gfx1102", // RX7900
162163
"gfx1200", "gfx1201", // RX8900

0 commit comments

Comments
 (0)