forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[2/4] Intel GPU Runtime Upstreaming for Device (pytorch#116833)
# Motivation According to [[1/4] Intel GPU Runtime Upstreaming for Device](pytorch#116019), as mentioned in [[RFC] Intel GPU Runtime Upstreaming](pytorch#114842), the second PR covers the changes under `aten`. # Design We will compile the code for XPU separately into a library named `libtorch_xpu.so`. Currently, it primarily offers device-related APIs, including - `getCurrentDeviceProperties` - `getDeviceProperties` - `getGlobalIdxFromDevice` - `getDeviceFromPtr` # Additional Context `XPUHooks` is an indispensable part of the runtime. We upstream `XPUHooks` in this PR since there is some code related to `Device` in it and we also refine some logic and code to avoid forward declaration in `DLPack`. Pull Request resolved: pytorch#116833 Approved by: https://github.com/EikanWang, https://github.com/jgong5, https://github.com/gujinghui, https://github.com/malfet
- Loading branch information
1 parent
61ea303
commit 79811e7
Showing
19 changed files
with
369 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <aten/src/ATen/xpu/XPUContext.h> | ||
#include <aten/src/ATen/xpu/XPUDevice.h> | ||
#include <c10/xpu/XPUFunctions.h> | ||
|
||
#define ASSERT_EQ_XPU(X, Y) \ | ||
{ \ | ||
bool _isEQ = X == Y; \ | ||
ASSERT_TRUE(_isEQ); \ | ||
} | ||
|
||
TEST(XpuDeviceTest, getDeviceProperties) { | ||
if (!at::xpu::is_available()) { | ||
return; | ||
} | ||
|
||
c10::xpu::DeviceProp* cur_device_prop = at::xpu::getCurrentDeviceProperties(); | ||
c10::xpu::DeviceProp* device_prop = at::xpu::getDeviceProperties(0); | ||
|
||
ASSERT_EQ_XPU(cur_device_prop->name, device_prop->name); | ||
ASSERT_EQ_XPU(cur_device_prop->platform_name, device_prop->platform_name); | ||
ASSERT_EQ_XPU(cur_device_prop->gpu_eu_count, device_prop->gpu_eu_count); | ||
} | ||
|
||
TEST(XpuDeviceTest, getDeviceFromPtr) { | ||
if (!at::xpu::is_available()) { | ||
return; | ||
} | ||
|
||
sycl::device& raw_device = at::xpu::get_raw_device(0); | ||
void* ptr = sycl::malloc_device(8, raw_device, at::xpu::get_device_context()); | ||
|
||
at::Device device = at::xpu::getDeviceFromPtr(ptr); | ||
sycl::free(ptr, at::xpu::get_device_context()); | ||
ASSERT_EQ_XPU(device.index(), 0); | ||
ASSERT_EQ_XPU(device.type(), at::kXPU); | ||
|
||
int dummy = 0; | ||
ASSERT_THROW(at::xpu::getDeviceFromPtr(&dummy), c10::Error); | ||
} | ||
|
||
TEST(XpuDeviceTest, getGlobalIdxFromDevice) { | ||
if (!at::xpu::is_available()) { | ||
return; | ||
} | ||
|
||
int target_device = 0; | ||
auto global_index = at::xpu::getGlobalIdxFromDevice(target_device); | ||
auto devices = sycl::device::get_devices(); | ||
ASSERT_EQ_XPU(devices[global_index], at::xpu::get_raw_device(target_device)); | ||
|
||
void* ptr = sycl::malloc_device(8, devices[global_index], at::xpu::get_device_context()); | ||
at::Device device = at::xpu::getDeviceFromPtr(ptr); | ||
sycl::free(ptr, at::xpu::get_device_context()); | ||
ASSERT_EQ_XPU(device.index(), target_device); | ||
ASSERT_EQ_XPU(device.type(), at::kXPU); | ||
|
||
if (at::xpu::device_count() == 1) { | ||
return; | ||
} | ||
// Test the last device. | ||
target_device = at::xpu::device_count() - 1; | ||
global_index = at::xpu::getGlobalIdxFromDevice(target_device); | ||
ASSERT_EQ_XPU(devices[global_index], at::xpu::get_raw_device(target_device)); | ||
|
||
target_device = at::xpu::device_count(); | ||
ASSERT_THROW(at::xpu::getGlobalIdxFromDevice(target_device), c10::Error); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#include <ATen/xpu/XPUContext.h> | ||
#include <c10/util/CallOnce.h> | ||
#include <c10/util/Exception.h> | ||
|
||
#include <cmath> | ||
#include <deque> | ||
#include <mutex> | ||
#include <vector> | ||
|
||
namespace at::xpu { | ||
namespace { | ||
|
||
/* | ||
* Currently, there is one device properties pool containing the information and | ||
* capability about each compute-device. | ||
* | ||
* Device properties are lazily initialized when the first time properties are | ||
* requested for a device. | ||
*/ | ||
DeviceIndex num_gpus = -1; | ||
c10::once_flag init_flag; | ||
std::deque<c10::once_flag> device_prop_flags; | ||
std::vector<DeviceProp> device_properties; | ||
|
||
std::deque<c10::once_flag> device_global_idx_flags; | ||
std::vector<int32_t> device_global_idxs; | ||
|
||
void initXPUContextVectors() { | ||
num_gpus = c10::xpu::device_count(); | ||
device_prop_flags.resize(num_gpus); | ||
device_properties.resize(num_gpus); | ||
device_global_idx_flags.resize(num_gpus); | ||
device_global_idxs.resize(num_gpus); | ||
} | ||
|
||
void initDeviceProperty(DeviceIndex device) { | ||
c10::xpu::get_device_properties(&device_properties[device], device); | ||
} | ||
|
||
void initDeviceGlobalIdx(DeviceIndex device) { | ||
sycl::device& raw_device = c10::xpu::get_raw_device(device); | ||
// Get all SYCL devices associated with the SYCL platform. | ||
auto devices = sycl::device::get_devices(); | ||
auto match_device = [raw_device](const auto& dev) -> bool { | ||
return raw_device == dev; | ||
}; | ||
auto it = std::find_if(devices.begin(), devices.end(), match_device); | ||
TORCH_CHECK( | ||
it != devices.end(), "Cant't find the global index of XPU device."); | ||
device_global_idxs[device] = | ||
static_cast<int32_t>(std::distance(devices.begin(), it)); | ||
} | ||
|
||
inline void check_device(DeviceIndex device) { | ||
TORCH_CHECK( | ||
device >= 0 && device < num_gpus, | ||
"device is out of range, device is ", | ||
static_cast<int>(device), | ||
", total number of device is ", | ||
static_cast<int>(num_gpus), | ||
"."); | ||
} | ||
|
||
} // anonymous namespace | ||
|
||
DeviceProp* getCurrentDeviceProperties() { | ||
auto device = c10::xpu::current_device(); | ||
return getDeviceProperties(device); | ||
} | ||
|
||
DeviceProp* getDeviceProperties(DeviceIndex device) { | ||
c10::call_once(init_flag, initXPUContextVectors); | ||
if (device == -1) | ||
device = c10::xpu::current_device(); | ||
check_device(device); | ||
c10::call_once(device_prop_flags[device], initDeviceProperty, device); | ||
return &device_properties[device]; | ||
} | ||
|
||
// Return the global index enumerated by sycl::device::get_devices based on the | ||
// index of a XPU device in the framework. | ||
int32_t getGlobalIdxFromDevice(DeviceIndex device) { | ||
c10::call_once(init_flag, initXPUContextVectors); | ||
check_device(device); | ||
c10::call_once(device_global_idx_flags[device], initDeviceGlobalIdx, device); | ||
return device_global_idxs[device]; | ||
} | ||
|
||
} // namespace at::xpu |
Oops, something went wrong.