Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: System Allocator support for Level Zero #782

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 30 additions & 16 deletions level_zero/core/source/cmdlist/cmdlist_hw.inl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include <algorithm>
#include <unordered_map>


namespace L0 {

inline ze_result_t parseErrorCode(NEO::CommandContainer::ErrorCode returnValue) {
Expand Down Expand Up @@ -514,18 +515,28 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd

appendEventForProfiling(event, nullptr, true, false, false, false);
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(static_cast<const void *>(pNumLaunchArguments));
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
commandContainer.addToResidencyContainer(alloc);

if (allocData) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should only be allowed if system shared USM is enabled

auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
commandContainer.addToResidencyContainer(alloc);

for (uint32_t i = 0; i < numKernels; i++) {
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly(false));
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
pLaunchArgumentsBuffer[i],
nullptr, launchParams);
if (ret) {
return ret;
}
}
} else {
for (uint32_t i = 0; i < numKernels; i++) {
NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly(false));

ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
pLaunchArgumentsBuffer[i],
nullptr, launchParams);
if (ret) {
return ret;
}
}
}
addToMappedEventList(event);
appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false);
Expand Down Expand Up @@ -2012,18 +2023,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,

bool hostPointerNeedsFlush = false;

NEO::SvmAllocationData *allocData = nullptr;
bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, size, allocData);
if (dstAllocFound) {
if (allocData->memoryType == InternalMemoryType::hostUnifiedMemory ||
allocData->memoryType == InternalMemoryType::sharedUnifiedMemory) {
hostPointerNeedsFlush = true;
}
} else {
if (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
if (!deviceImp->isSystemAllocEnabled()) {
NEO::SvmAllocationData *allocData = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this code is skipped for Shared System USM ?

bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, size, allocData);
if (dstAllocFound) {
if (allocData->memoryType == InternalMemoryType::hostUnifiedMemory ||
allocData->memoryType == InternalMemoryType::sharedUnifiedMemory) {
hostPointerNeedsFlush = true;
}
} else {
hostPointerNeedsFlush = true;
if (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} else {
hostPointerNeedsFlush = true;
}
}
}

Expand Down
10 changes: 10 additions & 0 deletions level_zero/core/source/device/device_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include "level_zero/tools/source/sysman/sysman.h"

#include "encode_surface_state_args.h"
#include "shared/source/os_interface/linux/drm_neo.h"

#include <algorithm>
#include <array>
Expand Down Expand Up @@ -2003,4 +2004,13 @@ uint32_t DeviceImp::getEventMaxKernelCount() const {
return l0GfxCoreHelper.getEventMaxKernelCount(hardwareInfo);
}

bool DeviceImp::isSystemAllocEnabled() const {
auto &osInterface = this->getNEODevice()->getRootDeviceEnvironment().osInterface;
if (osInterface->getDriverModel()->getDriverModelType() == NEO::DriverModelType::drm) {
auto pDrm = osInterface->getDriverModel()->as<NEO::Drm>();
return pDrm->isSystemAllocEnabled();
}
return false;
}

} // namespace L0
1 change: 1 addition & 0 deletions level_zero/core/source/device/device_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ struct DeviceImp : public Device, NEO::NonCopyableOrMovableClass {
ze_result_t setDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties);
uint32_t getEventMaxPacketCount() const override;
uint32_t getEventMaxKernelCount() const override;
bool isSystemAllocEnabled() const;
uint32_t queryDeviceNodeMask();
NEO::EngineGroupType getInternalEngineGroupType();
uint32_t getCopyEngineOrdinal() const;
Expand Down
1 change: 1 addition & 0 deletions shared/source/debug_settings/debug_variables_base.inl
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceExtendedUSMBufferSize, -1, "-1: default, 0:
DECLARE_DEBUG_VARIABLE(int32_t, ForceExtendedKernelIsaSize, -1, "-1: default, 0: disabled, >=1: Forces extended kernel isa size by specified pageSize number")
DECLARE_DEBUG_VARIABLE(int32_t, ForceSimdMessageSizeInWalker, -1, "-1: default, >=0 Program given value in Walker command for SIMD size")
DECLARE_DEBUG_VARIABLE(int32_t, EnableRecoverablePageFaults, -1, "-1: default - ignore, 0: disable, 1: enable recoverable page faults on all VMs (on faultable hardware)")
DECLARE_DEBUG_VARIABLE(int32_t, EnableSystemAllocator, -1, "-1: default - ignore, 0: disable, 1: enable use of system-allocated memory for GPU access")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please make sure that System Shared USM terminology is used

DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitMigrationOnFaultableHardware, -1, "-1: default - ignore, 0: disable, 1: enable implicit migration on faultable hardware (for all allocations)")
DECLARE_DEBUG_VARIABLE(int32_t, UseDrmVirtualEnginesForCcs, -1, "-1: default, 0: disable, 1: enable, Combine all CCS nodes to single VE (per context)")
DECLARE_DEBUG_VARIABLE(int32_t, UseDrmVirtualEnginesForBcs, -1, "-1: default, 0: disable, 1: enable, Combine all BCS nodes to single VE (per context)")
Expand Down
28 changes: 28 additions & 0 deletions shared/source/os_interface/linux/drm_neo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
#include <map>
#include <sstream>

#ifndef DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR
#define DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR (1 << 4)
#endif

namespace NEO {

Drm::Drm(std::unique_ptr<HwDeviceIdDrm> &&hwDeviceIdIn, RootDeviceEnvironment &rootDeviceEnvironment)
Expand Down Expand Up @@ -1140,6 +1144,16 @@ bool Drm::hasPageFaultSupport() const {
return pageFaultSupported;
}

void Drm::checkSystemAllocEnabled() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please change the name to clearly indicate that this is for Shared System USM

auto drmVersion = Drm::getDrmVersion(getFileDescriptor());
bool systemAllocSupported = false;
//For now, this can only be enabled with debug variable
if (("xe" == drmVersion) && (debugManager.flags.EnableSystemAllocator.get() != -1)) {
systemAllocSupported = !!debugManager.flags.EnableSystemAllocator.get();
}
setSystemAllocEnable(systemAllocSupported);
}

bool Drm::hasKmdMigrationSupport() const {
const auto &productHelper = this->getRootDeviceEnvironment().getHelper<ProductHelper>();
auto kmdMigrationSupported = hasPageFaultSupport() && productHelper.isKmdMigrationSupported();
Expand Down Expand Up @@ -1570,6 +1584,20 @@ int Drm::createDrmVirtualMemory(uint32_t &drmVmId) {

if (ret == 0) {
drmVmId = ctl.vmId;
checkSystemAllocEnabled();
if (isSystemAllocEnabled()) {
VmBindParams vmBind{};
vmBind.vmId = static_cast<uint32_t>(ctl.vmId);
vmBind.flags = DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR;
vmBind.handle = 0;
vmBind.length = (0x1ull << 48);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be dependant on CPU VA range

vmBind.offset = 0;
vmBind.start = 0;
vmBind.userptr = 0;
setVmBindSystemAlloc(true);
ret = ioctlHelper->vmBind(vmBind);
setVmBindSystemAlloc(false);
}
if (ctl.vmId == 0) {
// 0 is reserved for invalid/unassigned ppgtt
return -1;
Expand Down
9 changes: 9 additions & 0 deletions shared/source/os_interface/linux/drm_neo.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ class Drm : public DriverModel {
void setDirectSubmissionActive(bool value) { this->directSubmissionActive = value; }
bool isDirectSubmissionActive() const { return this->directSubmissionActive; }

void setVmBindSystemAlloc(bool value) { this->vmBindSystemAlloc = value; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove, this needs to be passed via arguments and not by modyfing global drm state.

bool isVmBindSystemAlloc() const { return this->vmBindSystemAlloc; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove and pass via arguments

void checkSystemAllocEnabled();
void setSystemAllocEnable(bool value) { this->systemAllocEnable = value; }
bool isSystemAllocEnabled() const { return this->systemAllocEnable; }

MOCKABLE_VIRTUAL bool isSetPairAvailable();
MOCKABLE_VIRTUAL bool getSetPairAvailable() { return setPairAvailable; }
MOCKABLE_VIRTUAL bool isChunkingAvailable();
Expand All @@ -166,6 +172,7 @@ class Drm : public DriverModel {

MOCKABLE_VIRTUAL void queryPageFaultSupport();
bool hasPageFaultSupport() const;
bool hasSystemAllocSupport() const;
bool hasKmdMigrationSupport() const;
bool checkToDisableScratchPage() { return disableScratch; }
unsigned int getGpuFaultCheckThreshold() const { return gpuFaultCheckThreshold; }
Expand Down Expand Up @@ -348,6 +355,8 @@ class Drm : public DriverModel {
bool requirePerContextVM = false;
bool bindAvailable = false;
bool directSubmissionActive = false;
bool vmBindSystemAlloc = false;
bool systemAllocEnable = false;
bool setPairAvailable = false;
bool chunkingAvailable = false;
uint32_t chunkingMode = 0;
Expand Down
69 changes: 55 additions & 14 deletions shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
#define STRINGIFY_ME(X) return #X
#define RETURN_ME(X) return X

#ifndef DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR
#define DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR (1 << 4)
#endif
namespace NEO {

const char *IoctlHelperXe::xeGetClassName(int className) {
Expand Down Expand Up @@ -1317,6 +1320,37 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) {
const char *operation = isBind ? "bind" : "unbind";
int index = invalidIndex;

if (this->drm.isVmBindSystemAlloc()) {
drm_xe_vm_bind bind = {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this path is taken for all vm_binds?
this overwrites other ioctl calls, please unify the logic so we have a single ioctl calling point

bind.vm_id = vmBindParams.vmId;
bind.num_syncs = 0;
bind.num_binds = 1;
bind.bind.range = vmBindParams.length;
bind.bind.addr = 0; //gmmHelper->decanonize(vmBindParams.start);
bind.bind.obj_offset = vmBindParams.offset;
bind.bind.pat_index = static_cast<uint16_t>(vmBindParams.patIndex);
bind.bind.extensions = vmBindParams.extensions;
bind.bind.flags = static_cast<uint32_t>(vmBindParams.flags);
bind.bind.op = DRM_XE_VM_BIND_OP_MAP;
bind.bind.obj = 0;
ret = IoctlHelper::ioctl(DrmIoctl::gemVmBind, &bind);
xeLog(" vm=%d obj=0x%x off=0x%llx range=0x%llx addr=0x%llx operation=%d(%s) flags=%d(%s) nsy=%d pat=%hu ret=%d\n",
bind.vm_id,
bind.bind.obj,
bind.bind.obj_offset,
bind.bind.range,
bind.bind.addr,
bind.bind.op,
xeGetBindOperationName(bind.bind.op),
bind.bind.flags,
" ", //xeGetBindFlagNames(bind.bind.flags).c_str(),
bind.num_syncs,
bind.bind.pat_index,
ret);
return ret;
}


if (isBind) {
for (auto i = 0u; i < bindInfo.size(); i++) {
if (vmBindParams.handle && vmBindParams.handle == bindInfo[i].handle) {
Expand Down Expand Up @@ -1373,11 +1407,16 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) {
bind.bind.obj_offset = bindInfo[index].userptr;
}
} else {
bind.bind.op = DRM_XE_VM_BIND_OP_UNMAP;
bind.bind.obj = 0;
if (bindInfo[index].userptr) {
bind.bind.obj_offset = bindInfo[index].userptr;
if (this->drm.isSystemAllocEnabled()) {
bind.bind.op = DRM_XE_VM_BIND_OP_MAP;
bind.bind.flags |= DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR;
} else {
bind.bind.op = DRM_XE_VM_BIND_OP_UNMAP;
}
bind.bind.obj = 0;
//if (bindInfo[index].userptr) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove commented code

// bind.bind.obj_offset = bindInfo[index].userptr;
//}
}

bindInfo[index].addr = bind.bind.addr;
Expand All @@ -1403,17 +1442,19 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) {
return ret;
}

constexpr auto oneSecTimeout = 1000000000ll;
constexpr auto infiniteTimeout = -1;
bool debuggingEnabled = drm.getRootDeviceEnvironment().executionEnvironment.isDebuggingEnabled();
uint64_t timeout = debuggingEnabled ? infiniteTimeout : oneSecTimeout;
if (debugManager.flags.VmBindWaitUserFenceTimeout.get() != -1) {
timeout = debugManager.flags.VmBindWaitUserFenceTimeout.get();
if (!this->drm.isVmBindSystemAlloc()) {
constexpr auto oneSecTimeout = 1000000000ll;
constexpr auto infiniteTimeout = -1;
bool debuggingEnabled = drm.getRootDeviceEnvironment().executionEnvironment.isDebuggingEnabled();
uint64_t timeout = debuggingEnabled ? infiniteTimeout : oneSecTimeout;
if (debugManager.flags.VmBindWaitUserFenceTimeout.get() != -1) {
timeout = debugManager.flags.VmBindWaitUserFenceTimeout.get();
}
return xeWaitUserFence(bind.exec_queue_id, DRM_XE_UFENCE_WAIT_OP_EQ,
sync[0].addr,
sync[0].timeline_value, timeout,
false, NEO::InterruptId::notUsed, nullptr);
}
return xeWaitUserFence(bind.exec_queue_id, DRM_XE_UFENCE_WAIT_OP_EQ,
sync[0].addr,
sync[0].timeline_value, timeout,
false, NEO::InterruptId::notUsed, nullptr);
}

xeLog("error: -> IoctlHelperXe::%s %s index=%d vmid=0x%x h=0x%x s=0x%llx o=0x%llx l=0x%llx f=0x%llx pat=%hu r=%d\n",
Expand Down