Skip to content

Commit

Permalink
[Refactor] AlexNet
Browse files Browse the repository at this point in the history
  • Loading branch information
zgjja committed Oct 28, 2024
1 parent 33d54ea commit ab35e3e
Show file tree
Hide file tree
Showing 7 changed files with 539 additions and 388 deletions.
56 changes: 38 additions & 18 deletions alexnet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,45 @@
cmake_minimum_required(VERSION 2.6)
cmake_minimum_required(VERSION 3.14)

project(alexnet)
project(
alexnet
VERSION 0.1
LANGUAGES C CXX CUDA)

add_definitions(-std=c++11)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES
60
70
72
75
80
86
89)
endif()

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
set(CMAKE_BUILD_TYPE
"Debug"
CACHE STRING "Build type for this project" FORCE)

include_directories(${PROJECT_SOURCE_DIR}/include)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)

add_executable(alexnet ${PROJECT_SOURCE_DIR}/alex.cpp)
target_link_libraries(alexnet nvinfer)
target_link_libraries(alexnet cudart)
find_package(Threads REQUIRED)
find_package(CUDAToolkit REQUIRED)

add_definitions(-O2 -pthread)
if(NOT TARGET TensorRT::TensorRT)
include(FindTensorRT.cmake)
else()
message("TensorRT has been found, skipping for ${PROJECT_NAME}")
endif()

add_executable(${PROJECT_NAME} alex.cpp)

target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/include)

target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads m
TensorRT::TensorRT CUDA::cudart)
78 changes: 78 additions & 0 deletions alexnet/FindTensorRT.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
cmake_minimum_required(VERSION 3.17.0)

set(TRT_VERSION
$ENV{TRT_VERSION}
CACHE STRING
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\"")

# find TensorRT include folder
if(NOT TensorRT_INCLUDE_DIR)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(TensorRT_INCLUDE_DIR
"/usr/local/cuda/targets/aarch64-linux/include"
CACHE PATH "TensorRT_INCLUDE_DIR")
else()
set(TensorRT_INCLUDE_DIR
"/usr/include/x86_64-linux-gnu"
CACHE PATH "TensorRT_INCLUDE_DIR")
endif()
message(STATUS "TensorRT: ${TensorRT_INCLUDE_DIR}")
endif()

# find TensorRT library folder
if(NOT TensorRT_LIBRARY_DIR)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(TensorRT_LIBRARY_DIR
"/usr/lib/aarch64-linux-gnu/tegra"
CACHE PATH "TensorRT_LIBRARY_DIR")
else()
set(TensorRT_LIBRARY_DIR
"/usr/include/x86_64-linux-gnu"
CACHE PATH "TensorRT_LIBRARY_DIR")
endif()
message(STATUS "TensorRT: ${TensorRT_LIBRARY_DIR}")
endif()

set(TensorRT_LIBRARIES)

message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}")

# process for different TensorRT version
if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "")
string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
set(TRT_MAJOR_VERSION "${_match}")
set(_modules nvinfer nvinfer_plugin)

if(TRT_MAJOR_VERSION GREATER_EQUAL 8)
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
endif()
else()
message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"")
endif()

# find and add all modules of TensorRT into list
foreach(lib IN LISTS _modules)
find_library(
TensorRT_${lib}_LIBRARY
NAMES ${lib}
HINTS ${TensorRT_LIBRARY_DIR})
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
endforeach()

# make the "TensorRT target"
add_library(TensorRT IMPORTED INTERFACE)
add_library(TensorRT::TensorRT ALIAS TensorRT)
target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})

set_target_properties(
TensorRT
PROPERTIES C_STANDARD 17
CXX_STANDARD 17
POSITION_INDEPENDENT_CODE ON
SKIP_BUILD_RPATH TRUE
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "$\{ORIGIN\}"
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")

unset(TRT_MAJOR_VERSION)
unset(_modules)
72 changes: 53 additions & 19 deletions alexnet/README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,67 @@
# alexnet
# AlexNet

AlexNet model architecture from the "One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
## Introduction

For the details, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
AlexNet model architecture comes from this paper: [One weird trick for parallelizing convolutional neural networks](https://arxiv.org/abs/1404.5997). To generate `.wts` file, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet). To check the pytorch implementation of AlexNet, refer to [HERE](https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py#L17)

This alexnet is just several `conv-relu-pool` blocks followed by several `fc-relu`, nothing special. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addFullyConnected`.
AlexNet consists of 3 major parts: features, adaptive average pooling, and classifier:
* features: just several stacked `CRP`(conv-relu-pool) and `CR` layers
* adaptive average pooling: pytorch can decide its inner parameters, but we need to calculate it ourselves in TensorRT API
* classifier: just several `fc-relu` layers. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addMatrixMultiply`, `addElementWise` etc.

```
// 1. generate alexnet.wts from [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
// 2. put alexnet.wts into tensorrtx/alexnet
// 3. build and run
cd tensorrtx/alexnet
## Use AlexNet from PyTorch

mkdir build
We can use torchvision to load the pretrained alexnet model:

cd build
```python
alexnet = torchvision.models.alexnet(pretrained=True)
```

cmake ..
The model structure is:

```txt
AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
```

make
## FAQ

sudo ./alexnet -s // serialize model to plan file i.e. 'alexnet.engine'
### How to align the output with Pytorch?

sudo ./alexnet -d // deserialize plan file and run inference
If your output is different from pytorch, you have to check which TensorRT API or your code cause this. A simple solution would be check the `.engine` output part by part, e.g., you can set the early layer of alexnet as output:

// 4. see if the output is same as pytorchx/alexnet
```c++
fc3_1->getOutput(0)->setName(OUTPUT_NAME);
network->markOutput(*pool3->getOutput(0)); // original is: "*fc3_1->getOutput(0)"
```
For this line of code, i use the output from "feature" part of alexnet, ignoring the rest of the model, then, don't forget to change the `OUTPUT_SIZE` macro on top of the file, lastly, build the `.engine` file to apply the changes.
You can sum up all output from C++ code, and compare it with Pytorch output, for Pytorch, you can do this by: `torch.sum(x)` at debug phase. The ideal value deviation between 2 values would be $[10^{-1}, 10^{-2}]$, for this example, since the output elements for "feature" is $256 * 6 * 6$ (bacth = 1), the final error would roughly be $10^{-4}$.
Note: This is a quick check, for more accurate check, you have to save the output tensor into a file to compare them value by value, but this situation is rare.
Loading

0 comments on commit ab35e3e

Please sign in to comment.