Skip to content

Commit

Permalink
Merge pull request #74 from InfiniTensor/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
YdrMaster authored Feb 19, 2024
2 parents 54c2f7e + 3cb6f5d commit a813939
Show file tree
Hide file tree
Showing 205 changed files with 5,037 additions and 584 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@
[submodule "src/09python_ffi/pybind11"]
path = src/09python_ffi/pybind11
url = [email protected]:pybind/pybind11.git
[submodule "3rd-party/cccl"]
path = 3rd-party/cccl
url = [email protected]:NVIDIA/cccl.git
2 changes: 1 addition & 1 deletion 3rd-party/backward-cpp
1 change: 1 addition & 0 deletions 3rd-party/cccl
Submodule cccl added at b7d422
34 changes: 34 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
option(USE_CUDA "Support Nvidia GPU" OFF)
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
option(USE_BANG "Support Hanwuji MLU" OFF)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -41,6 +42,38 @@ if(USE_KUNLUN)
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
endif()

if (USE_BANG)
add_compile_definitions(USE_BANG)
include_directories(src/kernels/mlu/include)

# Neuware Evironment
if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME}))
message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env")
elseif (DEFINED NEUWARE_HOME)
set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
else()
set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
endif()
message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}")

# cnrt cndrv cnnl
include_directories("${NEUWARE_HOME}/include")
find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall")

if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH}))
execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE)
set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH")
elseif(DEFINED TARGET_CPU_ARCH)
set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
else()
set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
endif()
message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}")
endif()

add_compile_options(-march=native) # this will cause error in some machine
add_compile_options(-mtune=native)
add_compile_options(-Wall)
Expand Down Expand Up @@ -72,4 +105,5 @@ add_subdirectory(src/05computation)
add_subdirectory(src/06frontend)
add_subdirectory(src/07onnx)
add_subdirectory(src/08communication)
add_subdirectory(src/08-01llm)
add_subdirectory(src/09python_ffi)
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
TYPE ?= Debug
CUDA ?= OFF
KUNLUN ?= OFF
BANG ?= OFF

CMAKE_EXTRA =
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=

build:
mkdir -p build
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) $(CMAKE_EXTRA)
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) -DUSE_BANG=$(BANG) $(CMAKE_EXTRA)
make -j -C build

install-python: build
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ executor = compiler.compile("cuda", "default", []) # -------- 编译模型
- [fmt 10.1.1](https://github.com/fmtlib/fmt/releases/tag/10.1.0)
- [fmtlog v2.2.1](https://github.com/MengRao/fmtlog/releases/tag/v2.2.1)
- [googletest v1.14.0](https://github.com/google/googletest/releases/tag/v1.14.0)
- [backward-cpp v1.6](https://github.com/bombela/backward-cpp/releases/tag/v1.6)
- [backward-cpp master](https://github.com/bombela/backward-cpp)
- [result master](https://github.com/willowell/result)
- [abseil-cpp 20230802.1](https://github.com/abseil/abseil-cpp/releases/tag/20230802.1)

Expand Down
3 changes: 1 addition & 2 deletions src/00common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE COMMON_TEST test/*.cpp)
if(COMMON_TEST)
add_executable(common_test ${COMMON_TEST})
add_test(common_test common_test)
target_link_libraries(common_test common GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(common_test)
target_link_libraries(common_test common GTest::gtest_main Backward::Object)
endif()
6 changes: 3 additions & 3 deletions src/00common/include/common/error_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ namespace refactor {
std::abort()

#ifndef DISABLE_ASSERT
#define ASSERT(CONDITION, F, ...) \
{ \
if (!(CONDITION)) RUNTIME_ERROR(fmt::format("Assertion: " #F, ##__VA_ARGS__)); \
#define ASSERT(CONDITION, F, ...) \
{ \
if (!(CONDITION)) RUNTIME_ERROR(fmt::format("Assertion: " F, ##__VA_ARGS__)); \
}
#else
#define ASSERT(CONDITION, F)
Expand Down
3 changes: 2 additions & 1 deletion src/00common/include/common/rc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define RC_HPP

#include <functional>
#include <utility>

namespace refactor {

Expand All @@ -18,7 +19,7 @@ namespace refactor {
T *_value;
struct Counter {
size_t strong, weak;
} * _counter;
} *_counter;

Rc(T *ptr, Counter *counter) noexcept
: _value(ptr), _counter(counter) { inc(); }
Expand Down
3 changes: 1 addition & 2 deletions src/01graph_topo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE GRAPH_TOPO_TEST test/*.cpp)
if(GRAPH_TOPO_TEST)
add_executable(graph_topo_test ${GRAPH_TOPO_TEST})
add_test(graph_topo_test graph_topo_test)
target_link_libraries(graph_topo_test graph_topo GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(graph_topo_test)
target_link_libraries(graph_topo_test graph_topo GTest::gtest_main Backward::Object)
endif()
3 changes: 1 addition & 2 deletions src/02hardware/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ file(GLOB_RECURSE HARDWARE_TEST test/*.cpp)
if(HARDWARE_TEST)
add_executable(hardware_test ${HARDWARE_TEST})
add_test(hardware_test hardware_test)
target_link_libraries(hardware_test hardware GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(hardware_test)
target_link_libraries(hardware_test hardware GTest::gtest_main Backward::Object)
endif()
2 changes: 2 additions & 0 deletions src/02hardware/include/hardware/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace refactor::hardware {
enum class Type : int32_t {
Cpu,
Nvidia,
Mlu,
Kunlun,
};

protected:
Expand Down
19 changes: 19 additions & 0 deletions src/02hardware/include/hardware/devices/mlu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HARDWARE_DEVICES_MLU_H
#define HARDWARE_DEVICES_MLU_H

#include "../device.h"

namespace refactor::hardware {

class Mlu final : public Device {
public:
explicit Mlu(int32_t card);
void setContext() const noexcept final;
Type type() const noexcept final {
return Type::Mlu;
}
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_H
2 changes: 2 additions & 0 deletions src/02hardware/src/device_manager.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "hardware/device_manager.h"
#include "hardware/devices/cpu.h"
#include "hardware/devices/mlu.h"
#include "hardware/devices/nvidia.h"

namespace refactor::hardware::device {
Expand Down Expand Up @@ -37,6 +38,7 @@ namespace refactor::hardware::device {
using T = Device::Type;
// clang-format off
auto device = type == T::Nvidia ? std::make_shared<Nvidia>(card)
: type == T::Mlu ? std::make_shared<Mlu>(card)
: UNREACHABLEX(Arc<Device>, "");
// clang-format on
auto [kind, ok] = DEVICES.try_emplace(static_cast<int32_t>(type));
Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
namespace refactor::hardware {
using M = CpuMemory;

void *M::malloc(size_t size) noexcept {
void *M::malloc(size_t size) {
return std::malloc(size);
}
void M::free(void *ptr) noexcept {
void M::free(void *ptr) {
std::free(ptr);
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}

Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
namespace refactor::hardware {

class CpuMemory final : public Memory {
void *malloc(size_t) noexcept final;
void free(void *) noexcept final;
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware
Expand Down
33 changes: 33 additions & 0 deletions src/02hardware/src/devices/mlu/device.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "functions.hh"
#include "hardware/devices/mlu.h"
#include "hardware/mem_pool.h"
#include "memory.hh"

namespace refactor::hardware {

static Arc<Memory> bangMemory(int32_t card) {
#ifdef USE_BANG
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}",
card, free, total, size);
return std::make_shared<MemPool>(
std::make_shared<MluMemory>(),
size,
256ul);
#else
return nullptr;
#endif
}

Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}

void Mlu::setContext() const noexcept {
#ifdef USE_BANG
setDevice(_card);
#endif
}

}// namespace refactor::hardware
21 changes: 21 additions & 0 deletions src/02hardware/src/devices/mlu/functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "functions.hh"

namespace refactor::hardware {

#ifdef USE_BANG
int getDeviceCount() {
unsigned deviceCount;
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
return static_cast<int>(deviceCount);
}
void setDevice(int device) {
BANG_ASSERT(cnrtSetDevice(device));
}
MemInfo getMemInfo() {
MemInfo memInfo;
BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total));
return memInfo;
}
#endif

}// namespace refactor::hardware
28 changes: 28 additions & 0 deletions src/02hardware/src/devices/mlu/functions.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH

#include "common.h"

#ifdef USE_BANG
#include "cnrt.h"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}
#endif

namespace refactor::hardware {

struct MemInfo {
size_t free, total;
};

int getDeviceCount();
void setDevice(int device);
MemInfo getMemInfo();

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH
33 changes: 33 additions & 0 deletions src/02hardware/src/devices/mlu/memory.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "memory.hh"
#include "functions.hh"

namespace refactor::hardware {
#ifdef USE_BANG
using M = MluMemory;

void *M::malloc(size_t size) {
void *ptr;
BANG_ASSERT(cnrtMalloc(&ptr, size));
return ptr;
}
void M::free(void *ptr) {
BANG_ASSERT(cnrtFree(ptr));
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_HOST2DEV))
return dst;
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_DEV2HOST));
return dst;
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_PEER2PEER));
return dst;
}
#endif

}// namespace refactor::hardware
18 changes: 18 additions & 0 deletions src/02hardware/src/devices/mlu/memory.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH
#define HARDWARE_DEVICES_MLU_MEMORY_CUH

#include "hardware/memory.h"

namespace refactor::hardware {

class MluMemory final : public Memory {
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_MEMORY_HH
6 changes: 3 additions & 3 deletions src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ namespace refactor::hardware {

size_t free, total;
CUDA_ASSERT(cudaMemGetInfo(&free, &total));
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
auto size = free * 9 / 10;
cudaDeviceProp prop;
CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0));
CUDA_ASSERT(cudaGetDeviceProperties(&prop, card));
size_t alignment = prop.textureAlignment;
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}",
card, free, total, size, alignment);
Expand All @@ -34,7 +34,7 @@ namespace refactor::hardware {
size,
alignment);
#else
RUNTIME_ERROR("CUDA is not enabled");
return nullptr;
#endif
}

Expand Down
3 changes: 1 addition & 2 deletions src/03runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE RUNTIME_TEST test/*.cpp)
if(RUNTIME_TEST)
add_executable(runtime_test ${RUNTIME_TEST})
add_test(runtime_test runtime_test)
target_link_libraries(runtime_test runtime GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(runtime_test)
target_link_libraries(runtime_test runtime GTest::gtest_main Backward::Object)
endif()
Loading

0 comments on commit a813939

Please sign in to comment.