Skip to content

Commit ce71cfb

Browse files
committed
add a tester for bf16 to float conversions
1 parent b1a1675 commit ce71cfb

File tree

3 files changed

+167
-0
lines changed

3 files changed

+167
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (c) 2019-2024 Ben Ashbaugh
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
add_opencl_sample(
6+
TEST
7+
NUMBER 17
8+
TARGET bf16conversions
9+
VERSION 120
10+
SOURCES main.cpp)

samples/17_bf16conversions/main.cpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/*
2+
// Copyright (c) 2019-2024 Ben Ashbaugh
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
7+
#include <popl/popl.hpp>
8+
9+
#include <CL/opencl.hpp>
10+
11+
#include <cmath>
12+
#include <cstdint>
13+
14+
#include "util.hpp"
15+
16+
#ifndef CL_INTEL_BFLOAT16_CONVERSIONS_NAME
17+
#define CL_INTEL_BFLOAT16_CONVERSIONS_NAME \
18+
"cl_intel_bfloat16_conversions"
19+
#endif
20+
21+
static const char kernelString[] = R"CLC(
22+
kernel void bf16_convert( global float* dst )
23+
{
24+
uint id = get_global_id(0);
25+
dst[id] = intel_convert_as_bfloat16_float(id);;
26+
}
27+
)CLC";
28+
29+
static float bf16_to_float(const uint16_t a) {
30+
union {
31+
uint32_t intStorage;
32+
float floatValue;
33+
};
34+
intStorage = a << 16;
35+
return floatValue;
36+
}
37+
38+
int main(
39+
int argc,
40+
char** argv )
41+
{
42+
constexpr size_t gws = 65536;
43+
44+
int platformIndex = 0;
45+
int deviceIndex = 0;
46+
47+
{
48+
popl::OptionParser op("Supported Options");
49+
op.add<popl::Value<int>>("p", "platform", "Platform Index", platformIndex, &platformIndex);
50+
op.add<popl::Value<int>>("d", "device", "Device Index", deviceIndex, &deviceIndex);
51+
52+
bool printUsage = false;
53+
try {
54+
op.parse(argc, argv);
55+
} catch (std::exception& e) {
56+
fprintf(stderr, "Error: %s\n\n", e.what());
57+
printUsage = true;
58+
}
59+
60+
if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) {
61+
fprintf(stderr,
62+
"Usage: bf16conversions [options]\n"
63+
"%s", op.help().c_str());
64+
return -1;
65+
}
66+
}
67+
68+
std::vector<cl::Platform> platforms;
69+
cl::Platform::get(&platforms);
70+
71+
printf("Running on platform: %s\n",
72+
platforms[platformIndex].getInfo<CL_PLATFORM_NAME>().c_str() );
73+
74+
std::vector<cl::Device> devices;
75+
platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices);
76+
77+
printf("Running on device: %s\n",
78+
devices[deviceIndex].getInfo<CL_DEVICE_NAME>().c_str() );
79+
80+
bool has_cl_intel_bfloat16_conversions =
81+
checkDeviceForExtension(devices[deviceIndex], CL_INTEL_BFLOAT16_CONVERSIONS_NAME);
82+
if (has_cl_intel_bfloat16_conversions) {
83+
printf("Device supports " CL_INTEL_BFLOAT16_CONVERSIONS_NAME ".\n");
84+
} else {
85+
printf("Device does not support " CL_INTEL_BFLOAT16_CONVERSIONS_NAME ", exiting.\n");
86+
return -1;
87+
}
88+
89+
cl::Context context{devices[deviceIndex]};
90+
cl::CommandQueue commandQueue{context, devices[deviceIndex]};
91+
92+
cl::Program program{ context, kernelString };
93+
program.build();
94+
cl::Kernel kernel = cl::Kernel{ program, "bf16_convert" };
95+
96+
cl::Buffer deviceMemDst = cl::Buffer{
97+
context,
98+
CL_MEM_ALLOC_HOST_PTR,
99+
gws * sizeof(cl_float) };
100+
101+
kernel.setArg(0, deviceMemDst);
102+
commandQueue.enqueueNDRangeKernel(
103+
kernel,
104+
cl::NullRange,
105+
cl::NDRange{gws} );
106+
107+
// verification
108+
{
109+
auto pDst = (const float*)commandQueue.enqueueMapBuffer(
110+
deviceMemDst,
111+
CL_TRUE,
112+
CL_MAP_READ,
113+
0,
114+
gws * sizeof(cl_float) );
115+
116+
unsigned int mismatches = 0;
117+
118+
for( size_t i = 0; i < gws; i++ )
119+
{
120+
auto result = pDst[i];
121+
auto check = bf16_to_float(static_cast<uint16_t>(i));
122+
if( (std::isnan(result) && !std::isnan(check)) ||
123+
(!std::isnan(result) && std::isnan(check)) ||
124+
(!std::isnan(result) && !std::isnan(check) && result != check) )
125+
{
126+
if( mismatches < 16 )
127+
{
128+
fprintf(stderr, "MisMatch at index %zu: got %f (%08X), want %f (%08X)\n",
129+
i,
130+
result,
131+
*(unsigned int*)&result,
132+
check,
133+
*(unsigned int*)&check );
134+
}
135+
mismatches++;
136+
}
137+
}
138+
139+
if( mismatches )
140+
{
141+
fprintf(stderr, "Error: Found %d mismatches / %d values!!!\n",
142+
mismatches,
143+
(unsigned int)gws );
144+
}
145+
else
146+
{
147+
printf("Success.\n");
148+
}
149+
150+
commandQueue.enqueueUnmapMemObject(
151+
deviceMemDst,
152+
(void*)pDst );
153+
}
154+
155+
return 0;
156+
}

samples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ add_subdirectory( 06_ndrangekernelfromfile )
7676

7777
add_subdirectory( 10_queueexperiments )
7878
add_subdirectory( 16_floatatomics )
79+
add_subdirectory( 17_bf16conversions )
7980

8081
set(BUILD_EXTENSION_SAMPLES TRUE)
8182
if(NOT TARGET OpenCLExt)

0 commit comments

Comments
 (0)