Skip to content

Feature/multiprocess #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 33 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d17efa7
Working multiprocess solution
jishminor May 25, 2023
31384e5
Support papi op profiling per model
jishminor May 30, 2023
5ac8615
Use cma channel first, and improve error handling
jishminor May 30, 2023
6344cfd
Make parent process handle the connection listen
jishminor May 30, 2023
53adc1a
Fix build.yml
jishminor May 31, 2023
64f6c81
Fix cleanup of child process
jishminor May 31, 2023
81eb50a
Handle model execution failure correctly
jishminor May 31, 2023
c1c81fd
Improve error handling
jishminor May 31, 2023
a94b986
Fix issue with MMap input tensors in models
jishminor Jun 2, 2023
7a6ab0c
Improve efficiency of model instance message passing
jishminor Jun 2, 2023
c7c5e59
Fix bug in model instance tensor allocation
jishminor Jun 6, 2023
20eeff4
Use papi low level api
jishminor Jun 7, 2023
35fb954
Fix csv generation script
jishminor Jun 8, 2023
06ec744
Append utc time to csv file name
jishminor Jun 8, 2023
0c87cbb
Wait for model load message before claiming model is ready
jishminor Jun 8, 2023
53acc26
Don't worry about papi hl
jishminor Jun 8, 2023
8b0e28e
Add support for model NUMA policies
jishminor Jun 9, 2023
adf2c0b
Fix non numa build
jishminor Jun 12, 2023
40c32d1
Add time to profiling data by default
jishminor Jun 12, 2023
c81b65c
Explicitly call terminate on child process
jishminor Jun 12, 2023
a939d81
Put timeout on cleanup
jishminor Jun 12, 2023
d90d739
Fix exit handling in child process
jishminor Jun 12, 2023
c08cd3b
Add support for uncore papi events
jishminor Jul 27, 2023
7bfade4
Simplify perf counter infra
jishminor Jul 28, 2023
0ba5134
Add sample id to csv file, and fix csv gen
jishminor Jul 28, 2023
9737271
Only keep one copy of op timinigs in csv
jishminor Aug 2, 2023
9096e59
Add function to get list of avail cpus per socket
jishminor Aug 17, 2023
3caac11
Fix bug in validating arbitrary batch sizes
jishminor Aug 18, 2023
aba3fa3
Implement thread pinning feature
jishminor Aug 22, 2023
7367f6b
Give back threads to avail cpus
jishminor Aug 22, 2023
0b47640
Fix issue with model unloading
jishminor Aug 22, 2023
5c7ff58
Add flag to control thread pinning
jishminor Aug 23, 2023
6e875e4
Fix thread pinning strat
jishminor Aug 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ jobs:
-DTRITON_BACKEND_REPO_TAG=${{env.TRITON_REPO_TAG}} \
-DTRITON_CORE_REPO_TAG=${{env.TRITON_REPO_TAG}} \
-DTRITON_COMMON_REPO_TAG=${{env.TRITON_REPO_TAG}} \
-PAPI_PROFILING_ENABLE=ON \
-DPAPI_PROFILING_ENABLE=ON \
-DTRITON_ENABLE_MALI_GPU=${{env.TRITON_ENABLE_MALI_GPU}} \
-DTFLITE_ENABLE_RUY=${{env.TFLITE_ENABLE_RUY}} \
-DTFLITE_BAZEL_BUILD=${{env.TFLITE_BAZEL_BUILD}} \
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
/.devcontainer
/**/triton_qa_models
/**/armnn_tflite_backend_triton_model_repo.tar.gz
**/papi_hl_output*
*.csv

240 changes: 193 additions & 47 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

set(CMAKE_CXX_STANDARD 17)

SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")

set(TARGET_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})

# Triton Options
Expand Down Expand Up @@ -118,6 +122,12 @@ if(NOT (ACL_VERSION VERSION_GREATER "21.05"))
list(APPEND ACL_BUILD_FLAGS "internal_only=0")
endif()

# Enable REPROC++
set(REPROC++ ON)

# Numa
option(LIBNUMA_ENABLE "Enable libnuma usage" OFF)

#
# Dependencies
#
Expand All @@ -141,8 +151,18 @@ FetchContent_Declare(
GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
GIT_TAG ${TRITON_BACKEND_REPO_TAG}
GIT_SHALLOW ON)
FetchContent_Declare(
tensorpipe
GIT_REPOSITORY https://github.com/pytorch/tensorpipe.git
GIT_TAG bb1473a4b38b18268e8693044afdb8635bc8351b
GIT_SHALLOW ON)
FetchContent_Declare(
reproc
GIT_REPOSITORY https://github.com/DaanDeMeyer/reproc
GIT_TAG v14.2.4
GIT_SHALLOW ON)

set(MAKE_AVAILABLE_LIST repo-common repo-core repo-backend)
set(MAKE_AVAILABLE_LIST repo-common repo-core repo-backend tensorpipe reproc)

if(NOT TFLITE_BAZEL_BUILD)
FetchContent_Declare(
Expand All @@ -169,6 +189,19 @@ configure_file(src/libtriton_armnn_tflite.ldscript

include(ExternalProject)

# Handle hwloc
ExternalProject_Add(
hwloc
GIT_REPOSITORY https://github.com/open-mpi/hwloc
GIT_TAG hwloc-2.8.0
GIT_SHALLOW ON
SOURCE_DIR ${CMAKE_BINARY_DIR}/hwloc
BINARY_DIR ${CMAKE_BINARY_DIR}/hwloc
CONFIGURE_COMMAND ./autogen.sh && ./configure --prefix=<INSTALL_DIR> --enable-debug=$<IF:$<CONFIG:Debug>,"1","0"
BUILD_COMMAND make -j$(nproc)
UPDATE_COMMAND ""
INSTALL_COMMAND make install)

set(TFLITE_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/external/tensorflow_lite)

if(TFLITE_BAZEL_BUILD)
Expand Down Expand Up @@ -335,58 +368,187 @@ if (PAPI_PROFILING_ENABLE)
BUILD_COMMAND make -j$(nproc)
UPDATE_COMMAND ""
INSTALL_COMMAND make install
TEST_COMMAND make test
)
TEST_COMMAND make test)
endif()

#
# Handle libs for TFLite Backend
# Handle libs for Model Instance standalone executable
#

set(BACKEND_SRCS src/tflite.cc src/tflite_utils.cc src/tflite_utils.h)

set(MODEL_INSTANCE_SRCS
src/model_instance/model_instance_main.cc
src/model_instance/model_instance.cc
src/model_instance/model_instance.h
src/model_instance/model_instance_utils.h)
if(PAPI_PROFILING_ENABLE)
list(APPEND BACKEND_SRCS src/papi_profiler.cc)
list(APPEND MODEL_INSTANCE_SRCS src/model_instance/papi_profiler.cc)
endif()

add_library(triton-armnn-tflite-backend SHARED ${BACKEND_SRCS})

add_executable(model_instance ${MODEL_INSTANCE_SRCS})

set(MODEL_INSTANCE_LINK_LIBS
tensorpipe
triton-core-serverstub
triton-backend-utils)

# Handle discovery of libnuma
if(LIBNUMA_ENABLE)
find_package(Numa)
if(NUMA_FOUND)
# Here we just make numa available to all of our targets
link_directories(${NUMA_LIBRARY_DIR})
list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR})
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}")
check_symbol_exists(numa_node_of_cpu "numa.h" NUMA_V2)
if(NUMA_V2)
add_definitions(-DHAVE_LIBNUMA)
message(STATUS "libnuma found, building with support for NUMA nodes")
list(APPEND MODEL_INSTANCE_LINK_LIBS numa)
include_directories(SYSTEM ${NUMA_INCLUDE_DIR})
else()
message(FATAL_ERROR "libnuma not found, but was requested via option LIBNUMA_ENABLE")
endif()
endif()
mark_as_advanced(NUMA_FOUND)
endif(LIBNUMA_ENABLE)

set(MODEL_INSTANCE_INCLUDE_DIRS
${CMAKE_CURRENT_SOURCE_DIR}/src
${TENSORFLOW_ROOT} # for tflite headers
)
if(ARMNN_DELEGATE_ENABLE)
add_dependencies(triton-armnn-tflite-backend armnn)
add_dependencies(model_instance armnn)
list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
${ARMNN_LOCATION}/include # for armnn headers
${ARMNN_LOCATION}/src/armnn/delegate/include # for delegate headers
)
# As per https://review.mlplatform.org/c/ml/armnn/+/7327
if(ARMNN_VERSION VERSION_GREATER_EQUAL "22.05")
list(APPEND MODEL_INSTANCE_INCLUDE_DIRS ${ARMNN_LOCATION}/src/armnn/profiling)
endif()
target_compile_definitions(model_instance PRIVATE ARMNN_DELEGATE_ENABLE=1)
# Link the armnn lib
target_link_libraries(
model_instance PRIVATE "-L${ARMNN_LOCATION}/lib" -larmnn -larmnnDelegate)
endif()

if(PAPI_PROFILING_ENABLE)
add_dependencies(triton-armnn-tflite-backend papi)
add_dependencies(model_instance papi)
target_compile_definitions(
triton-armnn-tflite-backend
model_instance
PRIVATE PAPI_PROFILING_ENABLE=1
)
target_include_directories(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/include)
list(APPEND MODEL_INSTANCE_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/papi-prefix/include)

# Note that linking the STATIC papi library results in a segfault on call to PAPI_library_init, use shared lib
target_link_libraries(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/lib/libpapi.so)
target_link_libraries(model_instance PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/lib/libpapi.so)
endif()

if(LIBNUMA_ENABLE)
target_compile_definitions(
model_instance
PRIVATE LIBNUMA_ENABLE=1
)
endif()

if(TFLITE_BAZEL_BUILD)
list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
${TENSORFLOW_ROOT}/bazel-tensorflow-lite/external/flatbuffers/include)
# Link the tensorflow lite library from bazel tfile build
target_link_libraries(
model_instance
PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
-ltensorflowlite)
else()
list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
${TFLITE_LIB_ROOT}/flatbuffers/include)
list(APPEND MODEL_INSTANCE_LINK_LIBS tensorflow-lite)
endif()

target_include_directories(model_instance PRIVATE ${MODEL_INSTANCE_INCLUDE_DIRS})
target_link_libraries(model_instance PRIVATE ${MODEL_INSTANCE_LINK_LIBS})

target_compile_features(model_instance PRIVATE cxx_std_11)
target_compile_options(
model_instance
PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall
-Wextra
-Wno-unused-parameter
-Wno-type-limits
-Wno-comment
-Werror>)

set_target_properties(
model_instance
PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME model_instance
SKIP_BUILD_RPATH TRUE
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH FALSE
INSTALL_RPATH "$\{ORIGIN\}"
LINK_FLAGS
"-Wl,--no-as-needed")

#
# Handle libs for TFLite Backend
#

set(BACKEND_SRCS
src/tflite.cc
src/tflite_utils.cc
src/tflite_utils.h)

add_library(triton-armnn-tflite-backend SHARED ${BACKEND_SRCS})

add_library(TritonArmNNTFLiteBackend::triton-armnn-tflite-backend ALIAS
triton-armnn-tflite-backend)

set(BACKEND_INCLUDE_DIRS
${CMAKE_CURRENT_SOURCE_DIR}/src ${TENSORFLOW_ROOT} # for tflite headers
${ARMNN_LOCATION}/include # for armnn headers
${ARMNN_LOCATION}/src/armnn/delegate/include # for delegate headers
)

# As per https://review.mlplatform.org/c/ml/armnn/+/7327
if(ARMNN_VERSION VERSION_GREATER_EQUAL "22.05")
list(APPEND BACKEND_INCLUDE_DIRS ${ARMNN_LOCATION}/src/armnn/profiling)
endif()
set(BACKEND_LINK_LIBS
triton-core-serverapi triton-core-backendapi triton-core-serverstub
triton-backend-utils tensorpipe reproc++ ${CMAKE_DL_LIBS})

if(TFLITE_BAZEL_BUILD)
list(APPEND BACKEND_INCLUDE_DIRS
${TENSORFLOW_ROOT}/bazel-tensorflow-lite/external/flatbuffers/include)
# Link the tensorflow lite library from bazel tfile build
target_link_libraries(
triton-armnn-tflite-backend
PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
-ltensorflowlite)
else()
list(APPEND BACKEND_INCLUDE_DIRS
${TFLITE_LIB_ROOT}/flatbuffers/include)
list(APPEND BACKEND_LINK_LIBS tensorflow-lite)
endif()

add_dependencies(triton-armnn-tflite-backend hwloc)
list(APPEND BACKEND_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/hwloc-prefix/include)
target_link_libraries(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/hwloc-prefix/lib/libhwloc.so)

if(ARMNN_DELEGATE_ENABLE)
target_compile_definitions(triton-armnn-tflite-backend PRIVATE ARMNN_DELEGATE_ENABLE=1)
endif()

if(PAPI_PROFILING_ENABLE)
target_compile_definitions(
triton-armnn-tflite-backend
PRIVATE PAPI_PROFILING_ENABLE=1
)
endif()

if(LIBNUMA_ENABLE)
target_compile_definitions(
triton-armnn-tflite-backend
PRIVATE LIBNUMA_ENABLE=1
)
endif()

target_include_directories(triton-armnn-tflite-backend
Expand All @@ -404,12 +566,6 @@ target_compile_options(
-Wno-comment
-Werror>)

# ARMNN_DELEGATE_ENABLE exposed in header so set PUBLIC
if(${ARMNN_DELEGATE_ENABLE})
target_compile_definitions(triton-armnn-tflite-backend
PUBLIC ARMNN_DELEGATE_ENABLE=1)
endif() # ARMNN_DELEGATE_ENABLE

set_target_properties(
triton-armnn-tflite-backend
PROPERTIES
Expand All @@ -423,41 +579,31 @@ set_target_properties(
LINK_FLAGS
"-Wl,--no-as-needed,--version-script libtriton_armnn_tflite.ldscript")

set(BACKEND_LINK_LIBS
triton-core-serverapi triton-core-backendapi triton-core-serverstub
triton-backend-utils ${CMAKE_DL_LIBS})

if(TFLITE_BAZEL_BUILD)
# Link the tensorflow lite library from bazel tfile build
target_link_libraries(
triton-armnn-tflite-backend
PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
-ltensorflowlite)
else()
list(APPEND BACKEND_LINK_LIBS tensorflow-lite)
endif()

target_link_libraries(triton-armnn-tflite-backend PRIVATE ${BACKEND_LINK_LIBS})

if(ARMNN_DELEGATE_ENABLE)
# Link the armnn lib
target_link_libraries(
triton-armnn-tflite-backend PRIVATE "-L${ARMNN_LOCATION}/lib" -larmnn
-larmnnDelegate)
endif()

#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonArmNNTFLiteBackend)

install(
TARGETS model_instance
DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite)

install(
TARGETS triton-armnn-tflite-backend
EXPORT triton-armnn-tflite-backend-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite)

# Install hwloc libraries
install(
DIRECTORY ${CMAKE_BINARY_DIR}/hwloc-prefix/lib/
DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite
FILES_MATCHING
PATTERN "*.so*")

if(ARMNN_DELEGATE_ENABLE)
# Install ArmNN libraries and license
install(
Expand Down
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,5 +194,21 @@ instance_group [
```

## Enabling PAPI events
This backend supports PAPI performance counter sampling. This is exposed through the PAPI High Level API. We support performance counter tracing at the tflite operator level using tflite tracing instrumentation. To enable this, when launching triton pass the flag `--backend-config=armnn_tflite,papi-events=PAPI_TOT_CYC,PAPI_LD_INS`. Internally, the events listed get set to the environment variable `PAPI_EVENTS` as per the PAPI High Level API documentation. Results of this will be written to a newly created `papi_hl_output` folder in the directory you launched the server from.
Internally, the events listed get set to the environment variable `PAPI_EVENTS` as per the PAPI High Level API documentation. Results of this will be written to a newly created `papi_hl_output` folder in the directory you launched the server from.
This backend supports PAPI performance counter sampling. We support performance counter tracing at the tflite operator level using tflite tracing instrumentation. To enable this, you can use the following in your model config:
```
parameters {
key: "papi_events"
value: {
string_value:"PAPI_TOT_CYC,PAPI_LD_INS"
}
}
parameters {
key: "papi_uncore_events"
value: {
string_value:"tx2_dmc0::UNC_DMC_READS:u:cpu=0"
}
}
```
`papi_events` is used for the per core events such as total load instructions, and can be tracked at the thread level, `papi_uncore_events` are uncore events which are tracked at the socket level such as userspace DRAM reads for socket 0 in the example above.

Internally, the events listed get set to the environment variables `PAPI_EVENTS` and `PAPI_UNCORE_EVENTS`. Results of this will be written to a newly created file `counters_*.csv` file for you to use as you wish.
Loading