Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

profiler adapt to ascend npu #10536

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ option(USE_CLANG_TIDY "" OFF)
option(BUILD_PYTHON "" ON)
option(BUILD_CPP_API "Option to build OneFlow C++ API (beta)" OFF)
option(BUILD_RDMA "" OFF)
option(BUILD_CUDA "" ON)
option(BUILD_CUDA "" OFF)
option(BUILD_NPU "Huawei Ascend NPU" ON)
option(BUILD_TESTING "" OFF)
option(BUILD_GIT_VERSION "" ON)
option(BUILD_PROFILER "" OFF)
Expand All @@ -40,6 +41,10 @@ option(OF_FORCE_COLORED_DIAGNOSTICS "Always produce ANSI-colored diagnostics (GN

set(ONEFLOW_CURRENT_VERSION 0.8.1.dev CACHE STRING "")

if(BUILD_NPU)
add_definitions(-DWITH_NPU)
endif()

if(BUILD_FOR_CI)
set(ONEFLOW_CURRENT_VERSION ci)
endif()
Expand Down
79 changes: 79 additions & 0 deletions cmake/ascend_npu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# The following are set after configuration is done:
# ASCEND_INCLUDE_DIRS
# ASCEND_LIBRARIES


if(NOT DEFINED ENV{ASCEND_TOOLKIT_HOME})
message(WARNING "ASCEND_TOOLKIT_HOME env is not found. Setting default value: /usr/local/Ascend/ascend-toolkit/latest")
set(ASCEND_TOOLKIT_HOME "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "Folder contains Ascend toolkit")
else()
# get ASCEND_TOOLKIT_HOME from environment
message(STATUS "ASCEND_TOOLKIT_HOME found: $ENV{ASCEND_TOOLKIT_HOME}")
set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME} CACHE PATH "Folder contains Ascend toolkit")
endif()


find_path(
ASCEND_INCLUDE_DIRS
NAMES acl hccl
PATHS $ENV{ASCEND_TOOLKIT_HOME}/include $ENV{CPLUS_INCLUDE_PATH}
PATH_SUFFIXES include)

if(ASCEND_INCLUDE_DIRS)
message(STATUS "ASCEND_INCLUDE_DIRS found: ${ASCEND_INCLUDE_DIRS}")
execute_process(COMMAND source ${ASCEND_HOME_DIR}/bin/setenv.bash)
else()
message(
FATAL_ERROR
"Huawei Ascend header files are not found. Please set ASCEND_TOOLKIT_HOME to specify the search path."
)
endif()

find_library(
ASCEND_LD_LIBRARIES
NAMES ascendcl
PATHS ${ASCEND_TOOLKIT_HOME} $ENV{ASCEND_TOOLKIT_HOME}/lib64
$ENV{LD_LIBRARY_PATH})


if(ASCEND_LD_LIBRARIES)
message(STATUS "ASCEND_LD_LIBRARIES found: ${ASCEND_LD_LIBRARIES}")
else()
message(
FATAL_ERROR
"ASCEND_LD_LIBRARIES Ascend lib(ascendcl) is not found. Please set ASCEND_TOOLKIT_HOME to specify the search path."
)
endif()

find_library(
ASCEND_OP_COMPILER_LD_LIBRARIE
NAMES acl_op_compiler
PATHS ${ASCEND_TOOLKIT_HOME} $ENV{ASCEND_TOOLKIT_HOME}/lib64
$ENV{LD_LIBRARY_PATH})

if(NOT ASCEND_OP_COMPILER_LD_LIBRARIE)
message(
FATAL_ERROR
"ASCEND_OP_COMPILER_LD_LIBRARIE Ascend lib(acl_op_compiler) is not found. Please set ASCEND_TOOLKIT_HOME to specify the search path."
)
endif()

find_library(
ASCEND_HCCL_LD_LIBRARIE
NAMES hccl
PATHS ${ASCEND_TOOLKIT_HOME} $ENV{ASCEND_TOOLKIT_HOME}/lib64
$ENV{LD_LIBRARY_PATH})

if(NOT ASCEND_HCCL_LD_LIBRARIE)
message(
FATAL_ERROR
"ASCEND_HCCL_LD_LIBRARIE Ascend lib(hccl) is not found. Please set ASCEND_TOOLKIT_HOME to specify the search path."
)
endif()

set(ASCEND_INCLUDE_DIRS ${ASCEND_INCLUDE_DIRS})
set(ASCEND_LIBRARIES ${ASCEND_LD_LIBRARIES} ${ASCEND_HCCL_LD_LIBRARIE}
${ASCEND_OP_COMPILER_LD_LIBRARIE})

message(STATUS "Ascend: ASCEND_INCLUDE_DIRS = ${ASCEND_INCLUDE_DIRS}")
message(STATUS "Ascend: ASCEND_LIBRARIES = ${ASCEND_LIBRARIES}")
6 changes: 6 additions & 0 deletions cmake/oneflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ add_definitions(-DONEFLOW_BINARY_DIR="${PROJECT_BINARY_DIR}")

include(op_schema)

if(BUILD_NPU)
include(${PROJECT_SOURCE_DIR}/cmake/ascend_npu.cmake)
target_include_directories(oneflow PRIVATE ${ASCEND_INCLUDE_DIRS})
target_link_libraries(oneflow ${ASCEND_LIBRARIES})
endif()

get_property(EXTERNAL_TARGETS GLOBAL PROPERTY EXTERNAL_TARGETS)

if(APPLE)
Expand Down
150 changes: 150 additions & 0 deletions oneflow/core/profiler/acl_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#if defined(WITH_NPU)
#include <cstdlib>
#include <iostream>
#include <string>
#include "oneflow/core/profiler/acl_profiler.h"

namespace oneflow {
namespace profiler {

std::map<std::string, aclprofAicoreMetrics> npu_metrics_map_ = {
{"ACL_AICORE_PIPE_UTILIZATION", ACL_AICORE_PIPE_UTILIZATION},
{"ACL_AICORE_ARITHMETIC_UTILIZATION", ACL_AICORE_ARITHMETIC_UTILIZATION},
{"ACL_AICORE_MEMORY_BANDWIDTH", ACL_AICORE_MEMORY_BANDWIDTH},
{"ACL_AICORE_L0B_AND_WIDTH", ACL_AICORE_L0B_AND_WIDTH},
{"ACL_AICORE_RESOURCE_CONFLICT_RATIO", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
{"ACL_AICORE_MEMORY_UB", ACL_AICORE_MEMORY_UB},
{"ACL_AICORE_L2_CACHE", ACL_AICORE_L2_CACHE},
{"ACL_AICORE_NONE", ACL_AICORE_NONE},
};

std::map<std::string, uint64_t> trace_level_map_ = {
{"Level0", Level0},
{"Level1", Level1},
{"Level2", Level2},
{"Level_none", Level_none},
};

aclError AclProfilingInit(const char* profilerResultPath, size_t length) {
return aclprofInit(profilerResultPath, length);
}

aclError AclProfilingStart(const aclprofConfig* profilerConfig) {
return aclprofStart(profilerConfig);
}

aclError AclProfilingStop(const aclprofConfig* profilerConfig) {
return aclprofStop(profilerConfig);
}

aclError AclProfilingFinalize() { return aclprofFinalize(); }

aclprofConfig* AclProfilingCreateConfig(uint32_t* deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents* aicoreEvents,
uint64_t dataTypeConfig) {
return aclprofCreateConfig(deviceIdList, deviceNums, aicoreMetrics, aicoreEvents, dataTypeConfig);
}

aclError AclprofSetConfig(aclprofConfigType configType, const char* config, size_t configLength) {
return aclprofSetConfig(configType, config, configLength);
}

aclError AclProfilingDestroyConfig(const aclprofConfig* profilerConfig) {
return aclprofDestroyConfig(profilerConfig);
}

aclprofConfig* AclPrepareTrace() {
// ref: torch_npu/csrc/profiler/profiler_mgr.cpp
char* profiler_log_dir_env_var = getenv("ASCEND_PROFILER_LOG_DIR");
if (profiler_log_dir_env_var == nullptr) {
char* env_var = getenv("ASCEND_TOOLKIT_HOME");
std::string ascend_home_path(env_var);
AclProfilingInit(ascend_home_path.c_str(), ascend_home_path.size());
} else {
std::string profiler_log_dir(profiler_log_dir_env_var);
AclProfilingInit(profiler_log_dir.c_str(), profiler_log_dir.size());
}

// torch_npu/profiler/profiler.py
// torch_npu/profiler/experimental_config.py
NpuTraceConfig npu_config = {
/*trace_level*/ "Level2", /*metrics*/ "ACL_AICORE_PIPE_UTILIZATION",
/*npu_memory*/ true, /*l2_cache*/ false,
/*record_op_args*/ true,
/*msprof_tx*/ true, /*op_attr*/ false};
aclprofAicoreMetrics aic_metrics = ACL_AICORE_NONE;
auto level_iter = trace_level_map_.find(npu_config.trace_level);
uint64_t datatype_config =
(level_iter == trace_level_map_.end()) ? Level0 : trace_level_map_[npu_config.trace_level];
auto metrics_iter = npu_metrics_map_.find(npu_config.metrics);
if (metrics_iter != npu_metrics_map_.end()
&& npu_config.metrics.compare("ACL_AICORE_NONE") != 0) {
datatype_config |= ACL_PROF_AICORE_METRICS;
aic_metrics = npu_metrics_map_[npu_config.metrics];
}
if (npu_config.l2_cache) { datatype_config |= ACL_PROF_L2CACHE; }
if (npu_config.msprof_tx) { datatype_config |= ACL_PROF_MSPROFTX; }
if (npu_config.npu_memory) {
datatype_config |= ACL_PROF_TASK_MEMORY;
const std::string freq = "50";
auto prof_ret = AclprofSetConfig(ACL_PROF_SYS_HARDWARE_MEM_FREQ, freq.c_str(), freq.size());
if (prof_ret == ACL_ERROR_PROF_MODULES_UNSUPPORTED) {
LOG(WARNING) << "ProfileManager npu AclprofSetConfig() failed: "
<< "not support to set config for sys-hardware-mem.";
}
}
// op_attr=true has bug
if (npu_config.op_attr) { datatype_config |= ACL_PROF_OP_ATTR; }

uint32_t deviceId = 0;
// TODO: get current local device
// auto ret = c10_npu::GetDevice(&deviceId);
// if (ret != ACL_ERROR_NONE) {
// LOG(WARNING) <<"ProfileManager npu AclprofSetConfig() failed: " << "Get Device ID failed.";
// return;
// }
const uint32_t deviceNum = 1;
uint32_t deviceIdList[deviceNum] = {deviceId};
aclprofConfig* profConfig =
AclProfilingCreateConfig(deviceIdList, deviceNum, aic_metrics, nullptr, datatype_config);
return profConfig;
}

aclError AclStartTrace(aclprofConfig* profConfig) { return AclProfilingStart(profConfig); }

void AclReleaseTrace(aclprofConfig* profConfig) {
aclrtSynchronizeDevice();
// stop
AclProfilingStop(profConfig);
auto ret = AclProfilingDestroyConfig(profConfig);
if (ret != ACL_SUCCESS) {
LOG(WARNING) << "ProfileManager npu AclReleaseTrace() failed: "
<< "AclProfDestoryConfig fail, error code: " << ret;
return;
}
profConfig = nullptr;

// finalize
AclProfilingFinalize();
}

} // namespace profiler
} // namespace oneflow

#endif // WITH_NPU
62 changes: 62 additions & 0 deletions oneflow/core/profiler/acl_profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#if defined(WITH_NPU)
#include <string>
#include <map>
#include <glog/logging.h>
#include "acl/acl.h"
#include "acl/acl_prof.h"

namespace oneflow {
namespace profiler {

// trace_level
constexpr uint64_t Level_none = 0;
constexpr uint64_t Level0 = ACL_PROF_TASK_TIME_L0 | ACL_PROF_ACL_API;
constexpr uint64_t Level1 =
ACL_PROF_TASK_TIME | ACL_PROF_ACL_API | ACL_PROF_HCCL_TRACE | ACL_PROF_AICORE_METRICS;
constexpr uint64_t Level2 = Level1 | ACL_PROF_RUNTIME_API | ACL_PROF_AICPU;

struct NpuTraceConfig {
std::string trace_level;
std::string metrics;
bool npu_memory;
bool l2_cache;
bool record_op_args;
bool msprof_tx;
bool op_attr;
};

#define ACL_PROF_OP_ATTR 0x00004000ULL

aclError AclProfilingInit(const char* profilerResultPath, size_t length);
aclError AclProfilingStart(const aclprofConfig* profilerConfig);
aclError AclProfilingStop(const aclprofConfig* profilerConfig);
aclError AclProfilingFinalize();
aclprofConfig* AclProfilingCreateConfig(uint32_t* deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents* aicoreEvents, uint64_t dataTypeConfig);
aclError AclprofSetConfig(aclprofConfigType configType, const char* config, size_t configLength);
aclError AclProfilingDestroyConfig(const aclprofConfig* profilerConfig);

aclprofConfig* AclPrepareTrace();
aclError AclStartTrace(aclprofConfig* profConfig);
void AclReleaseTrace(aclprofConfig* profConfig);

} // namespace profiler
} // namespace oneflow

#endif // WITH_NPU
35 changes: 29 additions & 6 deletions oneflow/core/profiler/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,36 @@ void IEvent::SetStartedAt(double t) { started_at_ = t; }

void IEvent::SetFinishedAt(double t) { finished_at_ = t; }

void IEvent::Start() { SetStartedAt(GetTimeNow()); }
void IEvent::Start() {
SetStartedAt(GetTimeNow(
#ifdef WITH_NPU
true
#else
false
#endif
));
}

void IEvent::Finish() { SetFinishedAt(GetTimeNow()); }
void IEvent::Finish() {
SetFinishedAt(GetTimeNow(
#ifdef WITH_NPU
true
#else
false
#endif
));
}

bool IEvent::IsChildOf(const IEvent* e) {
if (!e) { return false; }
if (this == e) { return false; }
return GetStartedAt<double>() >= e->GetStartedAt<double>()
&& GetFinishedAt<double>() <= e->GetFinishedAt<double>();
#ifdef WITH_NPU
const auto time_unit = EventTimeUnit::kNS;
#else
const auto time_unit = EventTimeUnit::kUS;
#endif
return GetStartedAt<double>(time_unit) >= e->GetStartedAt<double>(time_unit)
&& GetFinishedAt<double>(time_unit) <= e->GetFinishedAt<double>(time_unit);
}

const std::string& IEvent::GetName() const { return name_; }
Expand All @@ -60,10 +81,12 @@ nlohmann::json KernelEvent::ToJson() {
for (const auto& desc : description_) {
j["description"][desc.first] = {desc.second.first, desc.second.second};
}
#if defined(WITH_CUDA)
#if defined(WITH_CUDA) || defined(WITH_NPU)
#ifdef WITH_CUDA
j["memory_size"] = memory_size_;
if (!children_.empty()) { j["children"] = children_; }
#endif // WITH_CUDA
if (!children_.empty()) { j["children"] = children_; }
#endif // WITH_CUDA || WITH_NPU
return j;
}

Expand Down
Loading