Skip to content

Commit 71ca51f

Browse files
jinge90mdtoguchi
andauthored
[SYCL][Driver] Support bfloat16 devicelib selection when multiple AOT targets specified (#16494)
User can specify multiple AOT targets when building sycl program in followings ways: 1). via -fsycl-targets=intel_gpu_pvc,intel_gpu_acm_g10,.... 2). via -fsycl-targets=spir64_gen ... -Xs "-device pvc,dg2...." 3). via -fsycl-targets=spir64_gen..., -Xsycl-target-backend=spir64_gen "-device pvc" We should select native bfloat16 devicelib when all AOT targets specified support native bfloat16 conversion. Currently, pvc, dg2, bmg devices support native bfloat16. If user specifies JIT target together with AOT targets which all support native bfloat16 conversion, we still select native bfloat16 devicelib since bfloat16 devicelib is skipped in linking step for JIT target. --------- Signed-off-by: jinge90 <[email protected]> Co-authored-by: Michael Toguchi <[email protected]>
1 parent 51cfcfa commit 71ca51f

File tree

2 files changed

+173
-17
lines changed

2 files changed

+173
-17
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

+67-17
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
#include "clang/Driver/DriverDiagnostic.h"
1414
#include "clang/Driver/InputInfo.h"
1515
#include "clang/Driver/Options.h"
16+
#include "llvm/ADT/SmallSet.h"
1617
#include "llvm/Option/Option.h"
18+
#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp"
1719
#include "llvm/Support/CommandLine.h"
1820
#include "llvm/Support/FileSystem.h"
1921
#include "llvm/Support/Path.h"
20-
#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp"
2122
#include <algorithm>
2223
#include <sstream>
2324

@@ -305,6 +306,11 @@ bool SYCL::shouldDoPerObjectFileLinking(const Compilation &C) {
305306
// Return whether to use native bfloat16 library.
306307
static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
307308
bool &UseNative) {
309+
310+
static llvm::SmallSet<StringRef, 8> GPUArchsWithNBF16{
311+
"intel_gpu_pvc", "intel_gpu_acm_g10", "intel_gpu_acm_g11",
312+
"intel_gpu_acm_g12", "intel_gpu_dg2_10", "intel_gpu_dg2_11",
313+
"intel_dg2_g12", "intel_gpu_bmg_g21"};
308314
const llvm::opt::ArgList &Args = C.getArgs();
309315
bool NeedLibs = false;
310316

@@ -330,32 +336,75 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
330336
}
331337
}
332338

333-
UseNative = false;
334-
335-
// Check for intel_gpu_pvc as the target
336-
if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
337-
if (SYCLTarget->getValues().size() == 1) {
338-
StringRef SYCLTargetStr = SYCLTarget->getValue();
339-
if (SYCLTargetStr == "intel_gpu_pvc")
340-
UseNative = true;
341-
}
342-
}
343-
344-
auto checkBF = [](StringRef Device) {
345-
return Device.starts_with("pvc") || Device.starts_with("ats");
346-
};
339+
// We need to select fallback/native bfloat16 devicelib in AOT compilation
340+
// targetting for Intel GPU devices. Users have 2 ways to apply AOT,
341+
// 1). clang++ -fsycl -fsycl-targets=spir64_gen -Xs "-device pvc,...,"
342+
// 2). clang++ -fsycl -fsycl-targets=intel_gpu_pvc,...
343+
// 3). clang++ -fsycl -fsycl-targets=spir64_gen,intel_gpu_pvc,...
344+
// -Xsycl-target-backend=spir64_gen "-device dg2"
347345

348346
std::string Params;
349347
for (const auto &Arg : TargArgs) {
350348
Params += " ";
351349
Params += Arg;
352350
}
351+
352+
auto checkBF = [](StringRef Device) {
353+
return Device.starts_with("pvc") || Device.starts_with("ats") ||
354+
Device.starts_with("dg2") || Device.starts_with("bmg");
355+
};
356+
357+
auto checkSpirvJIT = [](StringRef Target) {
358+
return Target.starts_with("spir64-") || Target.starts_with("spirv64-") ||
359+
(Target == "spir64") || (Target == "spirv64");
360+
};
361+
353362
size_t DevicesPos = Params.find("-device ");
354-
if (!UseNative && DevicesPos != std::string::npos) {
363+
// "-device xxx" is used to specify AOT target device, so user must apply
364+
// -Xs "-device xxx" or -Xsycl-target-backend=spir64_gen "-device xxx"
365+
if (DevicesPos != std::string::npos) {
355366
UseNative = true;
356367
std::istringstream Devices(Params.substr(DevicesPos + 8));
357368
for (std::string S; std::getline(Devices, S, ',');)
358369
UseNative &= checkBF(S);
370+
371+
// When "-device XXX" is applied to specify GPU type, user can still
372+
// add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can
373+
// only be linked when all GPU types specified support.
374+
// We need to filter CPU and FPGA target here and only focus on GPU
375+
// device.
376+
if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
377+
for (auto TargetsV : SYCLTarget->getValues()) {
378+
if (!checkSpirvJIT(StringRef(TargetsV)) &&
379+
!StringRef(TargetsV).starts_with("spir64_gen") &&
380+
!StringRef(TargetsV).starts_with("spir64_x86_64") &&
381+
!StringRef(TargetsV).starts_with("spir64_fpga") &&
382+
!GPUArchsWithNBF16.contains(StringRef(TargetsV))) {
383+
UseNative = false;
384+
break;
385+
}
386+
}
387+
}
388+
389+
return NeedLibs;
390+
391+
} else {
392+
// -fsycl-targets=intel_gpu_xxx is used to specify AOT target device.
393+
// Multiple Intel GPU devices can be specified, native bfloat16 devicelib
394+
// can be involved only when all GPU deivces specified support native
395+
// bfloat16 native conversion.
396+
UseNative = true;
397+
398+
if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
399+
for (auto TargetsV : SYCLTarget->getValues()) {
400+
if (!checkSpirvJIT(StringRef(TargetsV)) &&
401+
!GPUArchsWithNBF16.contains(StringRef(TargetsV))) {
402+
UseNative = false;
403+
break;
404+
}
405+
}
406+
}
407+
return NeedLibs;
359408
}
360409
}
361410
return NeedLibs;
@@ -511,7 +560,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
511560
}
512561

513562
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
514-
LibraryList.push_back(Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
563+
LibraryList.push_back(
564+
Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
515565

516566
if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
517567
LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc"));

clang/test/Driver/sycl-device-lib-bfloat16.cpp

+106
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,95 @@
6868
// RUN: --sysroot=%S/Inputs/SYCL -### 2>&1 \
6969
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK
7070

71+
72+
// Test AOT-DG2 compilation uses native libs + native libs.
73+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 \
74+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
75+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE
76+
77+
// Test AOT-PVC + AOT-DG2 compilation uses native libs + native libs.
78+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_acm_g10 \
79+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
80+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE
81+
82+
// Test AOT-PVC + AOT-DG1 compilation uses native libs + native libs.
83+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,intel_gpu_acm_g10 \
84+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
85+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK
86+
87+
88+
// Test AOT-PVC + JIT compilation uses native libs + no libs
89+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64 \
90+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
91+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
92+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64 \
93+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
94+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
95+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown \
96+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
97+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
98+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown \
99+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
100+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
101+
102+
// Test test AOT-DG1 + JIT compilation uses native libs + no libs
103+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64 \
104+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
105+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
106+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64 \
107+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
108+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
109+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64-unknown-unknown \
110+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
111+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
112+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64-unknown-unknown \
113+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
114+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
115+
116+
// Test test AOT-PVC + JIT compilation + AOT-DG2 uses native libs + no libs + native libs
117+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_acm_g10 \
118+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
119+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
120+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_acm_g10 \
121+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
122+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
123+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_acm_g10 \
124+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
125+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
126+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_acm_g10 \
127+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
128+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
129+
130+
// Test test AOT-PVC + JIT compilation + AOT-DG1 uses fallback libs + no libs + fallback libs
131+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_dg1 \
132+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
133+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
134+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_dg1 \
135+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
136+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
137+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_dg1 \
138+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
139+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
140+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_dg1 \
141+
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
142+
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
143+
144+
// Test test AOT-PVC + AOT-DG1 specified via different options, uses fallback libs
145+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \
146+
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device dg1" \
147+
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK
148+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \
149+
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \
150+
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK
151+
152+
// Test test AOT-PVC + AOT-BMG specified via different options, uses native libs
153+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21,spir64_gen \
154+
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \
155+
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE
156+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \
157+
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device bmg-g21-a0" \
158+
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE
159+
71160
// BFLOAT16-NOT: llvm-link{{.*}} "{{.*}}libsycl-{{fallback|native}}-bfloat16.bc"
72161

73162
// BFLOAT16-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
@@ -85,3 +174,20 @@
85174

86175
// BFLOAT16-FALLBACK-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
87176
// BFLOAT16-FALLBACK-FALLBACK: "{{.*}}libsycl-fallback-bfloat16.bc"
177+
178+
// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
179+
// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
180+
181+
// BFLOAT16-NATIVE-NONE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
182+
// BFLOAT16-NATIVE-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
183+
184+
// BFLOAT16-FALLBACK-NONE: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
185+
// BFLOAT16-FALLBACK-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
186+
187+
// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
188+
// BFLOAT16-NATIVE-NONE-NATIVE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
189+
// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
190+
191+
// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
192+
// BFLOAT16-FALLBACK-NONE-FALLBACK-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
193+
// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"

0 commit comments

Comments
 (0)