Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DeivceASAN] Make ShadowMemory one instance per type #2585

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions source/loader/layers/sanitizer/asan/asan_ddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,34 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
const ur_device_handle_t *phDevices) {
std::shared_ptr<ContextInfo> CI;
UR_CALL(getAsanInterceptor()->insertContext(Context, CI));
for (uint32_t i = 0; i < numDevices; ++i) {
auto hDevice = phDevices[i];
std::shared_ptr<DeviceInfo> DI;
UR_CALL(getAsanInterceptor()->insertDevice(hDevice, DI));
DI->Type = GetDeviceType(Context, hDevice);
if (DI->Type == DeviceType::UNKNOWN) {
getContext()->logger.error("Unsupport device");
return UR_RESULT_ERROR_INVALID_DEVICE;
}
getContext()->logger.info(
"DeviceInfo {} (Type={}, IsSupportSharedSystemUSM={})",
(void *)DI->Handle, ToString(DI->Type), DI->IsSupportSharedSystemUSM);
getContext()->logger.info("Add {} into context {}", (void *)DI->Handle,
(void *)Context);
if (!DI->Shadow) {
UR_CALL(DI->allocShadowMemory(Context));

if (numDevices > 0) {
auto DeviceType = GetDeviceType(Context, phDevices[0]);
auto ShadowMemory =
getAsanInterceptor()->getOrCreateShadowMemory(phDevices[0], DeviceType);

for (uint32_t i = 0; i < numDevices; ++i) {
auto hDevice = phDevices[i];
std::shared_ptr<DeviceInfo> DI;
UR_CALL(getAsanInterceptor()->insertDevice(hDevice, DI));
DI->Type = GetDeviceType(Context, hDevice);
if (DI->Type == DeviceType::UNKNOWN) {
getContext()->logger.error("Unsupport device");
return UR_RESULT_ERROR_INVALID_DEVICE;
}
if (DI->Type != DeviceType) {
getContext()->logger.error("Different device type in the same context");
return UR_RESULT_ERROR_INVALID_DEVICE;
}
getContext()->logger.info(
"DeviceInfo {} (Type={}, IsSupportSharedSystemUSM={})",
(void *)DI->Handle, ToString(DI->Type), DI->IsSupportSharedSystemUSM);
getContext()->logger.info("Add {} into context {}", (void *)DI->Handle,
(void *)Context);
DI->Shadow = ShadowMemory;
CI->DeviceList.emplace_back(hDevice);
CI->AllocInfosMap[hDevice];
}
CI->DeviceList.emplace_back(hDevice);
CI->AllocInfosMap[hDevice];
}
return UR_RESULT_SUCCESS;
}
Expand Down
36 changes: 26 additions & 10 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ AsanInterceptor::~AsanInterceptor() {
// We must release these objects before releasing adapters, since
// they may use the adapter in their destructor
for (const auto &[_, DeviceInfo] : m_DeviceMap) {
[[maybe_unused]] auto URes = DeviceInfo->Shadow->Destory();
assert(URes == UR_RESULT_SUCCESS);
DeviceInfo->Shadow = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clear the "m_DeviceMap" directly?

Copy link
Contributor Author

@yingcong-wu yingcong-wu Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that should also work. But we aim to do minimum change here, so I would respectful reject this advice. We would have follow-up PR to do a better job at the refactoring.

}

m_Quarantine = nullptr;
Expand All @@ -48,6 +47,11 @@ AsanInterceptor::~AsanInterceptor() {
// detection depends on it.
m_AllocationMap.clear();

for (auto &[_, ShadowMemory] : m_ShadowMap) {
ShadowMemory->Destory();
getContext()->urDdiTable.Context.pfnRelease(ShadowMemory->Context);
}

for (auto Adapter : m_Adapters) {
getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter);
}
Expand Down Expand Up @@ -303,14 +307,26 @@ ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
return Result;
}

ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) {
Shadow = GetShadowMemory(Context, Handle, Type);
assert(Shadow && "Failed to get shadow memory");
UR_CALL(Shadow->Setup());
getContext()->logger.info("ShadowMemory(Global): {} - {}",
(void *)Shadow->ShadowBegin,
(void *)Shadow->ShadowEnd);
return UR_RESULT_SUCCESS;
std::shared_ptr<ShadowMemory>
AsanInterceptor::getOrCreateShadowMemory(ur_device_handle_t Device,
DeviceType Type) {
if (m_ShadowMap.find(Type) == m_ShadowMap.end()) {
std::scoped_lock<ur_shared_mutex> Guard(m_ShadowMapMutex);
if (m_ShadowMap.find(Type) == m_ShadowMap.end()) {
ur_context_handle_t InternalContext;
auto Res = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr,
&InternalContext);
if (Res != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to create shadow context");
return nullptr;
}
std::shared_ptr<ContextInfo> CI;
insertContext(InternalContext, CI);
m_ShadowMap[Type] = GetShadowMemory(InternalContext, Device, Type);
m_ShadowMap[Type]->Setup();
}
}
return m_ShadowMap[Type];
}

/// Each 8 bytes of application memory are mapped into one byte of shadow memory
Expand Down
11 changes: 6 additions & 5 deletions source/loader/layers/sanitizer/asan/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ struct DeviceInfo {
// Device handles are special and alive in the whole process lifetime,
// so we needn't retain&release here.
explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {}

ur_result_t allocShadowMemory(ur_context_handle_t Context);
};

struct QueueInfo {
Expand Down Expand Up @@ -353,6 +351,9 @@ class AsanInterceptor {

bool isNormalExit() { return m_NormalExit; }

std::shared_ptr<ShadowMemory>
getOrCreateShadowMemory(ur_device_handle_t Device, DeviceType Type);

private:
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
Expand All @@ -368,9 +369,6 @@ class AsanInterceptor {
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
LaunchInfo &LaunchInfo);

ur_result_t allocShadowMemory(ur_context_handle_t Context,
std::shared_ptr<DeviceInfo> &DeviceInfo);

ur_result_t registerDeviceGlobals(ur_program_handle_t Program);
ur_result_t registerSpirKernels(ur_program_handle_t Program);

Expand Down Expand Up @@ -406,6 +404,9 @@ class AsanInterceptor {
ur_shared_mutex m_AdaptersMutex;

bool m_NormalExit = true;

std::unordered_map<DeviceType, std::shared_ptr<ShadowMemory>> m_ShadowMap;
ur_shared_mutex m_ShadowMapMutex;
};

} // namespace asan
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,10 @@ DeviceType GetDeviceType(ur_context_handle_t Context,
// by the value of device USM pointer (see "USM Allocation Range" in
// asan_shadow.cpp)
auto Type = DeviceType::UNKNOWN;
if (Ptr >> 48 == 0xff00U) {

// L0 changes their VA layout.
// TODO: update our shadow memory layout/algorithms to accordingly.
if (((Ptr >> 52) & 0xff0U) == 0xff0U) {
Type = DeviceType::GPU_PVC;
} else {
Type = DeviceType::GPU_DG2;
Expand Down
Loading