smarter-project · jishminor · May 25, 2023 · May 30, 2023 · May 30, 2023 · May 30, 2023
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -175,7 +175,7 @@ jobs:
           -DTRITON_BACKEND_REPO_TAG=${{env.TRITON_REPO_TAG}} \
           -DTRITON_CORE_REPO_TAG=${{env.TRITON_REPO_TAG}} \
           -DTRITON_COMMON_REPO_TAG=${{env.TRITON_REPO_TAG}} \
-          -PAPI_PROFILING_ENABLE=ON \
+          -DPAPI_PROFILING_ENABLE=ON \
           -DTRITON_ENABLE_MALI_GPU=${{env.TRITON_ENABLE_MALI_GPU}} \
           -DTFLITE_ENABLE_RUY=${{env.TFLITE_ENABLE_RUY}} \
           -DTFLITE_BAZEL_BUILD=${{env.TFLITE_BAZEL_BUILD}} \

diff --git a/.gitignore b/.gitignore
@@ -9,5 +9,5 @@
 /.devcontainer
 /**/triton_qa_models
 /**/armnn_tflite_backend_triton_model_repo.tar.gz
-**/papi_hl_output*
+*.csv
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,6 +6,10 @@ if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
 endif()
 
+set(CMAKE_CXX_STANDARD 17)
+
+SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
+
 set(TARGET_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
 
 # Triton Options
@@ -118,6 +122,12 @@ if(NOT (ACL_VERSION VERSION_GREATER "21.05"))
   list(APPEND ACL_BUILD_FLAGS "internal_only=0")
 endif()
 
+# Enable REPROC++
+set(REPROC++ ON)
+
+# Numa
+option(LIBNUMA_ENABLE "Enable libnuma usage" OFF)
+
 #
 # Dependencies
 #
@@ -141,8 +151,18 @@ FetchContent_Declare(
   GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
   GIT_TAG ${TRITON_BACKEND_REPO_TAG}
   GIT_SHALLOW ON)
+FetchContent_Declare(
+    tensorpipe
+    GIT_REPOSITORY https://github.com/pytorch/tensorpipe.git
+    GIT_TAG bb1473a4b38b18268e8693044afdb8635bc8351b
+    GIT_SHALLOW ON)
+FetchContent_Declare(
+    reproc
+    GIT_REPOSITORY https://github.com/DaanDeMeyer/reproc
+    GIT_TAG v14.2.4
+    GIT_SHALLOW ON)
 
-set(MAKE_AVAILABLE_LIST repo-common repo-core repo-backend)
+set(MAKE_AVAILABLE_LIST repo-common repo-core repo-backend tensorpipe reproc)
 
 if(NOT TFLITE_BAZEL_BUILD)
   FetchContent_Declare(
@@ -169,6 +189,19 @@ configure_file(src/libtriton_armnn_tflite.ldscript
 
 include(ExternalProject)
 
+# Handle hwloc
+ExternalProject_Add(
+    hwloc
+    GIT_REPOSITORY https://github.com/open-mpi/hwloc
+    GIT_TAG hwloc-2.8.0
+    GIT_SHALLOW ON
+    SOURCE_DIR ${CMAKE_BINARY_DIR}/hwloc
+    BINARY_DIR ${CMAKE_BINARY_DIR}/hwloc
+    CONFIGURE_COMMAND ./autogen.sh && ./configure --prefix=<INSTALL_DIR> --enable-debug=$<IF:$<CONFIG:Debug>,"1","0"
+    BUILD_COMMAND make -j$(nproc)
+    UPDATE_COMMAND ""
+    INSTALL_COMMAND make install)
+
 set(TFLITE_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/external/tensorflow_lite)
 
 if(TFLITE_BAZEL_BUILD)
@@ -335,58 +368,187 @@ if (PAPI_PROFILING_ENABLE)
     BUILD_COMMAND make -j$(nproc)
     UPDATE_COMMAND ""
     INSTALL_COMMAND make install
-    TEST_COMMAND make test
-  )
+    TEST_COMMAND make test)
 endif()
 
 #
-# Handle libs for TFLite Backend
+# Handle libs for Model Instance standalone executable
 #
 
-set(BACKEND_SRCS src/tflite.cc src/tflite_utils.cc src/tflite_utils.h)
-
+set(MODEL_INSTANCE_SRCS 
+      src/model_instance/model_instance_main.cc 
+      src/model_instance/model_instance.cc 
+      src/model_instance/model_instance.h
+      src/model_instance/model_instance_utils.h)
 if(PAPI_PROFILING_ENABLE)
-  list(APPEND BACKEND_SRCS src/papi_profiler.cc)
+  list(APPEND MODEL_INSTANCE_SRCS src/model_instance/papi_profiler.cc)
 endif()
 
-add_library(triton-armnn-tflite-backend SHARED ${BACKEND_SRCS})
-
+add_executable(model_instance ${MODEL_INSTANCE_SRCS})
+
+set(MODEL_INSTANCE_LINK_LIBS 
+      tensorpipe 
+      triton-core-serverstub
+      triton-backend-utils)
+
+# Handle discovery of libnuma
+if(LIBNUMA_ENABLE)
+    find_package(Numa)
+    if(NUMA_FOUND)
+        # Here we just make numa available to all of our targets
+        link_directories(${NUMA_LIBRARY_DIR})
+        list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
+        list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR})
+        list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}")
+        check_symbol_exists(numa_node_of_cpu "numa.h" NUMA_V2)
+        if(NUMA_V2)
+            add_definitions(-DHAVE_LIBNUMA)
+            message(STATUS "libnuma found, building with support for NUMA nodes")
+            list(APPEND MODEL_INSTANCE_LINK_LIBS numa)
+            include_directories(SYSTEM ${NUMA_INCLUDE_DIR})
+        else()
+            message(FATAL_ERROR "libnuma not found, but was requested via option LIBNUMA_ENABLE")
+        endif()
+    endif()
+    mark_as_advanced(NUMA_FOUND)
+endif(LIBNUMA_ENABLE)
+
+set(MODEL_INSTANCE_INCLUDE_DIRS
+    ${CMAKE_CURRENT_SOURCE_DIR}/src 
+    ${TENSORFLOW_ROOT} # for tflite headers
+)
 if(ARMNN_DELEGATE_ENABLE)
-  add_dependencies(triton-armnn-tflite-backend armnn)
+  add_dependencies(model_instance armnn)
+  list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
+    ${ARMNN_LOCATION}/include # for armnn headers
+    ${ARMNN_LOCATION}/src/armnn/delegate/include # for delegate headers
+  )
+  # As per https://review.mlplatform.org/c/ml/armnn/+/7327
+  if(ARMNN_VERSION VERSION_GREATER_EQUAL "22.05")
+    list(APPEND MODEL_INSTANCE_INCLUDE_DIRS ${ARMNN_LOCATION}/src/armnn/profiling)
+  endif()
+  target_compile_definitions(model_instance PRIVATE ARMNN_DELEGATE_ENABLE=1)
+  # Link the armnn lib
+  target_link_libraries(
+    model_instance PRIVATE "-L${ARMNN_LOCATION}/lib" -larmnn -larmnnDelegate)
 endif()
 
 if(PAPI_PROFILING_ENABLE)
-  add_dependencies(triton-armnn-tflite-backend papi)
+  add_dependencies(model_instance papi)
   target_compile_definitions(
-    triton-armnn-tflite-backend
+    model_instance
     PRIVATE PAPI_PROFILING_ENABLE=1
   )
-  target_include_directories(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/include)
+  list(APPEND MODEL_INSTANCE_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/papi-prefix/include)
 
   # Note that linking the STATIC papi library results in a segfault on call to PAPI_library_init, use shared lib
-  target_link_libraries(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/lib/libpapi.so)
+  target_link_libraries(model_instance PRIVATE ${CMAKE_BINARY_DIR}/papi-prefix/lib/libpapi.so)
 endif()
 
+if(LIBNUMA_ENABLE)
+  target_compile_definitions(
+    model_instance
+    PRIVATE LIBNUMA_ENABLE=1
+  )
+endif()
+
+if(TFLITE_BAZEL_BUILD)
+  list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
+       ${TENSORFLOW_ROOT}/bazel-tensorflow-lite/external/flatbuffers/include)
+  # Link the tensorflow lite library from bazel tfile build
+  target_link_libraries(
+    model_instance
+    PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
+            -ltensorflowlite)
+else()
+  list(APPEND MODEL_INSTANCE_INCLUDE_DIRS
+    ${TFLITE_LIB_ROOT}/flatbuffers/include)
+  list(APPEND MODEL_INSTANCE_LINK_LIBS tensorflow-lite)
+endif()
+
+target_include_directories(model_instance PRIVATE ${MODEL_INSTANCE_INCLUDE_DIRS})
+target_link_libraries(model_instance PRIVATE ${MODEL_INSTANCE_LINK_LIBS})
+
+target_compile_features(model_instance PRIVATE cxx_std_11)
+target_compile_options(
+  model_instance
+  PRIVATE
+    $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall
+    -Wextra
+    -Wno-unused-parameter
+    -Wno-type-limits
+    -Wno-comment
+    -Werror>)
+
+set_target_properties(
+  model_instance
+  PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    OUTPUT_NAME model_instance
+    SKIP_BUILD_RPATH TRUE
+    BUILD_WITH_INSTALL_RPATH TRUE
+    INSTALL_RPATH_USE_LINK_PATH FALSE
+    INSTALL_RPATH "$\{ORIGIN\}"
+    LINK_FLAGS
+      "-Wl,--no-as-needed")
+
+#
+# Handle libs for TFLite Backend
+#
+
+set(BACKEND_SRCS 
+      src/tflite.cc 
+      src/tflite_utils.cc 
+      src/tflite_utils.h)
+
+add_library(triton-armnn-tflite-backend SHARED ${BACKEND_SRCS})
+
 add_library(TritonArmNNTFLiteBackend::triton-armnn-tflite-backend ALIAS
             triton-armnn-tflite-backend)
 
 set(BACKEND_INCLUDE_DIRS
     ${CMAKE_CURRENT_SOURCE_DIR}/src ${TENSORFLOW_ROOT} # for tflite headers
-    ${ARMNN_LOCATION}/include # for armnn headers
-    ${ARMNN_LOCATION}/src/armnn/delegate/include # for delegate headers
 )
 
-# As per https://review.mlplatform.org/c/ml/armnn/+/7327
-if(ARMNN_VERSION VERSION_GREATER_EQUAL "22.05")
-  list(APPEND BACKEND_INCLUDE_DIRS ${ARMNN_LOCATION}/src/armnn/profiling)
-endif()
+set(BACKEND_LINK_LIBS
+    triton-core-serverapi triton-core-backendapi triton-core-serverstub
+    triton-backend-utils tensorpipe reproc++ ${CMAKE_DL_LIBS})
 
 if(TFLITE_BAZEL_BUILD)
   list(APPEND BACKEND_INCLUDE_DIRS
        ${TENSORFLOW_ROOT}/bazel-tensorflow-lite/external/flatbuffers/include)
+  # Link the tensorflow lite library from bazel tfile build
+  target_link_libraries(
+    triton-armnn-tflite-backend
+    PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
+            -ltensorflowlite)
 else()
   list(APPEND BACKEND_INCLUDE_DIRS
        ${TFLITE_LIB_ROOT}/flatbuffers/include)
+  list(APPEND BACKEND_LINK_LIBS tensorflow-lite)
+endif()
+
+add_dependencies(triton-armnn-tflite-backend hwloc)
+list(APPEND BACKEND_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/hwloc-prefix/include)
+target_link_libraries(triton-armnn-tflite-backend PRIVATE ${CMAKE_BINARY_DIR}/hwloc-prefix/lib/libhwloc.so)
+
+if(ARMNN_DELEGATE_ENABLE)
+  target_compile_definitions(triton-armnn-tflite-backend PRIVATE ARMNN_DELEGATE_ENABLE=1)
+endif()
+
+if(PAPI_PROFILING_ENABLE)
+  target_compile_definitions(
+    triton-armnn-tflite-backend
+    PRIVATE PAPI_PROFILING_ENABLE=1
+  )
+endif()
+
+if(LIBNUMA_ENABLE)
+  target_compile_definitions(
+    triton-armnn-tflite-backend
+    PRIVATE LIBNUMA_ENABLE=1
+  )
 endif()
 
 target_include_directories(triton-armnn-tflite-backend
@@ -404,12 +566,6 @@ target_compile_options(
     -Wno-comment
     -Werror>)
 
-# ARMNN_DELEGATE_ENABLE exposed in header so set PUBLIC
-if(${ARMNN_DELEGATE_ENABLE})
-  target_compile_definitions(triton-armnn-tflite-backend
-                             PUBLIC ARMNN_DELEGATE_ENABLE=1)
-endif() # ARMNN_DELEGATE_ENABLE
-
 set_target_properties(
   triton-armnn-tflite-backend
   PROPERTIES
@@ -423,41 +579,31 @@ set_target_properties(
     LINK_FLAGS
       "-Wl,--no-as-needed,--version-script libtriton_armnn_tflite.ldscript")
 
-set(BACKEND_LINK_LIBS
-    triton-core-serverapi triton-core-backendapi triton-core-serverstub
-    triton-backend-utils ${CMAKE_DL_LIBS})
-
-if(TFLITE_BAZEL_BUILD)
-  # Link the tensorflow lite library from bazel tfile build
-  target_link_libraries(
-    triton-armnn-tflite-backend
-    PRIVATE "-L${TFLITE_LOCATION}/src/tensorflow-lite/bazel-bin/tensorflow/lite"
-            -ltensorflowlite)
-else()
-  list(APPEND BACKEND_LINK_LIBS tensorflow-lite)
-endif()
-
 target_link_libraries(triton-armnn-tflite-backend PRIVATE ${BACKEND_LINK_LIBS})
 
-if(ARMNN_DELEGATE_ENABLE)
-  # Link the armnn lib
-  target_link_libraries(
-    triton-armnn-tflite-backend PRIVATE "-L${ARMNN_LOCATION}/lib" -larmnn
-                                        -larmnnDelegate)
-endif()
-
 #
 # Install
 #
 include(GNUInstallDirs)
 set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonArmNNTFLiteBackend)
 
+install(
+  TARGETS model_instance
+  DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite)
+
 install(
   TARGETS triton-armnn-tflite-backend
   EXPORT triton-armnn-tflite-backend-targets
   LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite
   ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite)
 
+# Install hwloc libraries
+install(
+  DIRECTORY ${CMAKE_BINARY_DIR}/hwloc-prefix/lib/
+  DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/armnn_tflite
+  FILES_MATCHING
+  PATTERN "*.so*")
+
 if(ARMNN_DELEGATE_ENABLE)
   # Install ArmNN libraries and license
   install(

diff --git a/README.md b/README.md
@@ -194,5 +194,21 @@ instance_group [
 ```
 
 ## Enabling PAPI events
-This backend supports PAPI performance counter sampling. This is exposed through the PAPI High Level API. We support performance counter tracing at the tflite operator level using tflite tracing instrumentation. To enable this, when launching triton pass the flag `--backend-config=armnn_tflite,papi-events=PAPI_TOT_CYC,PAPI_LD_INS`. Internally, the events listed get set to the environment variable `PAPI_EVENTS` as per the PAPI High Level API documentation. Results of this will be written to a newly created `papi_hl_output` folder in the directory you launched the server from.
-Internally, the events listed get set to the environment variable `PAPI_EVENTS` as per the PAPI High Level API documentation. Results of this will be written to a newly created `papi_hl_output` folder in the directory you launched the server from.
+This backend supports PAPI performance counter sampling. We support performance counter tracing at the tflite operator level using tflite tracing instrumentation. To enable this, you can use the following in your model config:
+```
+parameters {
+    key: "papi_events"
+    value: {
+        string_value:"PAPI_TOT_CYC,PAPI_LD_INS"
+    }
+}
+parameters {
+    key: "papi_uncore_events"
+    value: {
+        string_value:"tx2_dmc0::UNC_DMC_READS:u:cpu=0"
+    }
+}
+```
+`papi_events` is used for the per core events such as total load instructions, and can be tracked at the thread level, `papi_uncore_events` are uncore events which are tracked at the socket level such as userspace DRAM reads for socket 0 in the example above.
+
+Internally, the events listed get set to the environment variables `PAPI_EVENTS` and `PAPI_UNCORE_EVENTS`. Results of this will be written to a newly created file `counters_*.csv` file for you to use as you wish.