-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
539 additions
and
388 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,45 @@ | ||
cmake_minimum_required(VERSION 2.6) | ||
cmake_minimum_required(VERSION 3.14) | ||
|
||
project(alexnet) | ||
project( | ||
alexnet | ||
VERSION 0.1 | ||
LANGUAGES C CXX CUDA) | ||
|
||
add_definitions(-std=c++11) | ||
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) | ||
set(CMAKE_CUDA_ARCHITECTURES | ||
60 | ||
70 | ||
72 | ||
75 | ||
80 | ||
86 | ||
89) | ||
endif() | ||
|
||
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) | ||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_BUILD_TYPE Debug) | ||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
set(CMAKE_CUDA_STANDARD 17) | ||
set(CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
set(CMAKE_INCLUDE_CURRENT_DIR TRUE) | ||
set(CMAKE_BUILD_TYPE | ||
"Debug" | ||
CACHE STRING "Build type for this project" FORCE) | ||
|
||
include_directories(${PROJECT_SOURCE_DIR}/include) | ||
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different | ||
# cuda | ||
include_directories(/usr/local/cuda/include) | ||
link_directories(/usr/local/cuda/lib64) | ||
# tensorrt | ||
include_directories(/usr/include/x86_64-linux-gnu/) | ||
link_directories(/usr/lib/x86_64-linux-gnu/) | ||
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF) | ||
|
||
add_executable(alexnet ${PROJECT_SOURCE_DIR}/alex.cpp) | ||
target_link_libraries(alexnet nvinfer) | ||
target_link_libraries(alexnet cudart) | ||
find_package(Threads REQUIRED) | ||
find_package(CUDAToolkit REQUIRED) | ||
|
||
add_definitions(-O2 -pthread) | ||
if(NOT TARGET TensorRT::TensorRT) | ||
include(FindTensorRT.cmake) | ||
else() | ||
message("TensorRT has been found, skipping for ${PROJECT_NAME}") | ||
endif() | ||
|
||
add_executable(${PROJECT_NAME} alex.cpp) | ||
|
||
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/include) | ||
|
||
target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads m | ||
TensorRT::TensorRT CUDA::cudart) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
cmake_minimum_required(VERSION 3.17.0) | ||
|
||
set(TRT_VERSION | ||
$ENV{TRT_VERSION} | ||
CACHE STRING | ||
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\"") | ||
|
||
# find TensorRT include folder | ||
if(NOT TensorRT_INCLUDE_DIR) | ||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") | ||
set(TensorRT_INCLUDE_DIR | ||
"/usr/local/cuda/targets/aarch64-linux/include" | ||
CACHE PATH "TensorRT_INCLUDE_DIR") | ||
else() | ||
set(TensorRT_INCLUDE_DIR | ||
"/usr/include/x86_64-linux-gnu" | ||
CACHE PATH "TensorRT_INCLUDE_DIR") | ||
endif() | ||
message(STATUS "TensorRT: ${TensorRT_INCLUDE_DIR}") | ||
endif() | ||
|
||
# find TensorRT library folder | ||
if(NOT TensorRT_LIBRARY_DIR) | ||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") | ||
set(TensorRT_LIBRARY_DIR | ||
"/usr/lib/aarch64-linux-gnu/tegra" | ||
CACHE PATH "TensorRT_LIBRARY_DIR") | ||
else() | ||
set(TensorRT_LIBRARY_DIR | ||
"/usr/include/x86_64-linux-gnu" | ||
CACHE PATH "TensorRT_LIBRARY_DIR") | ||
endif() | ||
message(STATUS "TensorRT: ${TensorRT_LIBRARY_DIR}") | ||
endif() | ||
|
||
set(TensorRT_LIBRARIES) | ||
|
||
message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}") | ||
|
||
# process for different TensorRT version | ||
if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "") | ||
string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION}) | ||
set(TRT_MAJOR_VERSION "${_match}") | ||
set(_modules nvinfer nvinfer_plugin) | ||
|
||
if(TRT_MAJOR_VERSION GREATER_EQUAL 8) | ||
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean) | ||
endif() | ||
else() | ||
message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"") | ||
endif() | ||
|
||
# find and add all modules of TensorRT into list | ||
foreach(lib IN LISTS _modules) | ||
find_library( | ||
TensorRT_${lib}_LIBRARY | ||
NAMES ${lib} | ||
HINTS ${TensorRT_LIBRARY_DIR}) | ||
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY}) | ||
endforeach() | ||
|
||
# make the "TensorRT target" | ||
add_library(TensorRT IMPORTED INTERFACE) | ||
add_library(TensorRT::TensorRT ALIAS TensorRT) | ||
target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES}) | ||
|
||
set_target_properties( | ||
TensorRT | ||
PROPERTIES C_STANDARD 17 | ||
CXX_STANDARD 17 | ||
POSITION_INDEPENDENT_CODE ON | ||
SKIP_BUILD_RPATH TRUE | ||
BUILD_WITH_INSTALL_RPATH TRUE | ||
INSTALL_RPATH "$\{ORIGIN\}" | ||
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}") | ||
|
||
unset(TRT_MAJOR_VERSION) | ||
unset(_modules) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,67 @@ | ||
# alexnet | ||
# AlexNet | ||
|
||
AlexNet model architecture from the "One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper. | ||
## Introduction | ||
|
||
For the details, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet) | ||
AlexNet model architecture comes from this paper: [One weird trick for parallelizing convolutional neural networks](https://arxiv.org/abs/1404.5997). To generate `.wts` file, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet). To check the pytorch implementation of AlexNet, refer to [HERE](https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py#L17) | ||
|
||
This alexnet is just several `conv-relu-pool` blocks followed by several `fc-relu`, nothing special. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addFullyConnected`. | ||
AlexNet consists of 3 major parts: features, adaptive average pooling, and classifier: | ||
* features: just several stacked `CRP`(conv-relu-pool) and `CR` layers | ||
* adaptive average pooling: pytorch can decide its inner parameters, but we need to calculate it ourselves in TensorRT API | ||
* classifier: just several `fc-relu` layers. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addMatrixMultiply`, `addElementWise` etc. | ||
|
||
``` | ||
// 1. generate alexnet.wts from [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet) | ||
// 2. put alexnet.wts into tensorrtx/alexnet | ||
// 3. build and run | ||
cd tensorrtx/alexnet | ||
## Use AlexNet from PyTorch | ||
|
||
mkdir build | ||
We can use torchvision to load the pretrained alexnet model: | ||
|
||
cd build | ||
```python | ||
alexnet = torchvision.models.alexnet(pretrained=True) | ||
``` | ||
|
||
cmake .. | ||
The model structure is: | ||
|
||
```txt | ||
AlexNet( | ||
(features): Sequential( | ||
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)) | ||
(1): ReLU(inplace=True) | ||
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) | ||
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) | ||
(4): ReLU(inplace=True) | ||
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) | ||
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | ||
(7): ReLU(inplace=True) | ||
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | ||
(9): ReLU(inplace=True) | ||
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | ||
(11): ReLU(inplace=True) | ||
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) | ||
) | ||
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6)) | ||
(classifier): Sequential( | ||
(0): Dropout(p=0.5, inplace=False) | ||
(1): Linear(in_features=9216, out_features=4096, bias=True) | ||
(2): ReLU(inplace=True) | ||
(3): Dropout(p=0.5, inplace=False) | ||
(4): Linear(in_features=4096, out_features=4096, bias=True) | ||
(5): ReLU(inplace=True) | ||
(6): Linear(in_features=4096, out_features=1000, bias=True) | ||
) | ||
) | ||
``` | ||
|
||
make | ||
## FAQ | ||
|
||
sudo ./alexnet -s // serialize model to plan file i.e. 'alexnet.engine' | ||
### How to align the output with Pytorch? | ||
|
||
sudo ./alexnet -d // deserialize plan file and run inference | ||
If your output is different from pytorch, you have to check which TensorRT API or your code cause this. A simple solution would be check the `.engine` output part by part, e.g., you can set the early layer of alexnet as output: | ||
|
||
// 4. see if the output is same as pytorchx/alexnet | ||
```c++ | ||
fc3_1->getOutput(0)->setName(OUTPUT_NAME); | ||
network->markOutput(*pool3->getOutput(0)); // original is: "*fc3_1->getOutput(0)" | ||
``` | ||
For this line of code, i use the output from "feature" part of alexnet, ignoring the rest of the model, then, don't forget to change the `OUTPUT_SIZE` macro on top of the file, lastly, build the `.engine` file to apply the changes. | ||
You can sum up all output from C++ code, and compare it with Pytorch output, for Pytorch, you can do this by: `torch.sum(x)` at debug phase. The ideal value deviation between 2 values would be $[10^{-1}, 10^{-2}]$, for this example, since the output elements for "feature" is $256 * 6 * 6$ (bacth = 1), the final error would roughly be $10^{-4}$. | ||
Note: This is a quick check, for more accurate check, you have to save the output tensor into a file to compare them value by value, but this situation is rare. |
Oops, something went wrong.