Skip to content

Commit

Permalink
Merge pull request #190 from Kracozebr/master
Browse files Browse the repository at this point in the history
Add deformable conv to repo
  • Loading branch information
haotian-liu authored Oct 5, 2022
2 parents 7e7e624 + 3d9dda3 commit 3f423ed
Show file tree
Hide file tree
Showing 18 changed files with 1,739 additions and 101 deletions.
49 changes: 36 additions & 13 deletions INSTALL.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,59 @@
## Installation
- Set up a Python3 environment.
- Install [Pytorch](http://pytorch.org/) 1.6.0 and TorchVision.
- Install [TensorRT](https://developer.nvidia.com/tensorrt) 7.1.3.4 and [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt) 0.1.0 (*optional* for evaluating models without TensorRT, currently TensorRT optimization only supports devices with [Tensor Cores](https://www.nvidia.com/en-us/data-center/tensor-cores/), and already included in [JetPack SDK](https://developer.nvidia.com/embedded/Jetpack) if using Jetson devices):
1. Install CUDA 10.2/11.0 and cuDNN 8.0.0.
2. Download TensorRT 7.1.3.4 tar file [here](https://developer.nvidia.com/nvidia-tensorrt-7x-download) and install TensorRT (refer to [official documentation](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-713/install-guide/index.html#installing-tar) for more details).
- Install [Pytorch](http://pytorch.org/) 1.7.1 and TorchVision v.0.8.2.
- Install [TensorRT](https://developer.nvidia.com/tensorrt) 8.2.1.8 and [torch2trt_dynamic](https://github.com/grimoire/torch2trt_dynamic) v0.5.0 (*optional* for evaluating models without TensorRT, currently TensorRT optimization only supports devices with [Tensor Cores](https://www.nvidia.com/en-us/data-center/tensor-cores/), and already included in [JetPack SDK](https://developer.nvidia.com/embedded/Jetpack) if using Jetson devices):
1. Install CUDA 10.2/11.4 and cuDNN 8.2.
2. Download TensorRT 8.2.1.8 tar file [here](https://developer.nvidia.com/nvidia-tensorrt-8x-download) and install TensorRT (refer to [official documentation](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-821/install-guide/index.html#installing-tar) for more details).
```Shell
tar xzvf TensorRT-${version}.${os}.${arch}-gnu.${cuda}.${cudnn}.tar.gz
version="8.x.x.x"
arch=$(uname -m)
cuda="cuda-x.x"
cudnn="cudnn8.x"
tar xzvf TensorRT-${version}.Linux.${arch}-gnu.${cuda}.${cudnn}.tar.gz
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<TensorRT-${version}/lib>

cd TensorRT-${version}/python
pip3 install tensorrt-*-cp3x-none-linux_x86_64.whl
python3 -m pip install tensorrt-*-cp3x-none-linux_x86_64.whl

cd TensorRT-${version}/uff
pip3 install uff-0.6.9-py2.py3-none-any.whl
python3 -m pip install uff-0.6.9-py2.py3-none-any.whl

cd TensorRT-${version}/graphsurgeon
pip3 install graphsurgeon-0.4.5-py2.py3-none-any.whl
python3 -m pip install graphsurgeon-0.4.5-py2.py3-none-any.whl
```
3. Install [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt).
3. Install [torch2trt_dynamic](https://github.com/grimoire/torch2trt_dynamic).
```Shell
git clone https://github.com/NVIDIA-AI-IOT/torch2trt
cd torch2trt
sudo python setup.py install --plugins
git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic
cd torch2trt_dynamic
python setup.py develop
```
4. Install deformable convolution module to pytorch if you want to work with yolact_edge+ models. Go to ./external/mod_def_conv and run setup.py
```Shell
cd external/mod_def_conv
python setup.py install
```
5. Install [amirstan_plugin](https://github.com/grimoire/amirstan_plugin) which contain the deformable convolution plugin with dynamic shapes for TensorRT 8.x. IT is needed only if you want to work with yolact edge+ models.
```Shell
apt install -y software-properties-common
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
apt update && apt install -y cmake
git clone --depth=1 --branch v0.5.0 https://github.com/grimoire/amirstan_plugin.git
cd amirstan_plugin
cmake -DTENSORRT_DIR=/usr/lib/x86_64-linux-gnu -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
make -j$(nproc)
export AMIRSTAN_LIBRARY_PATH=<amirstan_plugin_root>/lib
```


- Install some other packages:
```Shell
# Cython needs to be installed before pycocotools
pip install cython
pip install opencv-python pillow matplotlib
pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
pip install GitPython termcolor tensorboard
pip install GitPython termcolor tensorboard packaging
```
- Clone this repository and enter it:
```Shell
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ python train.py --config=yolact_edge_vid_trainflow_config --resume=./weights/yol
python train.py --config=yolact_edge_vid_config --resume=./weights/yolact_edge_vid_trainflow_144_100000.pth
```

### Experimental
One can try to train yolact edge+ models with deformable convolutions. For installation instructions see [INSTALL.md](INSTALL.md)
```Shell
python train.py --config=yolact_edge_plus_config
```

### Custom Datasets
You can also train on your own dataset by following these steps:
Expand Down
68 changes: 38 additions & 30 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,43 +1,51 @@
FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04
FROM nvcr.io/nvidia/cuda:11.4.0-cudnn8-devel-ubuntu18.04


ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
git wget sudo build-essential \
git wget build-essential \
python3 python3-setuptools python3-pip python3-dev python3-tk \
ffmpeg libsm6 libxext6
RUN ln -svf /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade --force pip

# TensorRT
ARG version="8.0.5.39-1+cuda11.0"
RUN apt-get update && apt-get install -y libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev
ARG version="7.2.3-1+cuda11.0"
# CUDNN
ARG version="8.2.2.26-1+cuda11.4"
RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages \
libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev

# Install Tensorrt 8.2.1.8
ARG version="8.2.1-1+cuda11.4"
RUN apt-get update && \
apt-get install -y libnvinfer7=${version} libnvonnxparsers7=${version} libnvparsers7=${version} libnvinfer-plugin7=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \
apt-mark hold libnvinfer7 libnvonnxparsers7 libnvparsers7 libnvinfer-plugin7 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer

# create a non-root user
ARG USER_ID=1000
ARG USER=appuser
RUN useradd -m --no-log-init --system --uid ${USER_ID} ${USER} -g sudo
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
USER ${USER}
WORKDIR /home/${USER}
ENV PATH="/home/${USER}/.local/bin:${PATH}"
apt-get install -y libnvinfer8=${version} libnvonnxparsers8=${version} libnvparsers8=${version} libnvinfer-plugin8=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \
apt-mark hold libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer


# # Install dependencies
RUN pip install --user cython opencv-python pillow matplotlib GitPython termcolor tensorboard
RUN pip install --user git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
RUN pip install --user torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html

# torch2trt
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt
WORKDIR /home/${USER}/torch2trt
RUN python setup.py install --plugins --user

WORKDIR /home/${USER}
RUN ln -s /yolact_edge
RUN ln -s /datasets
WORKDIR /home/${USER}/yolact_edge
RUN pip install cython opencv-python pillow matplotlib GitPython termcolor tensorboard packaging
RUN pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
RUN pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html

# torch2trt_dynamic
WORKDIR /root
RUN git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic && \
cd torch2trt_dynamic && \
python setup.py develop

# installing plugins for torch2trt_dynamic
WORKDIR /root

RUN apt install -y software-properties-common && \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \
apt update && apt install -y cmake && \
git clone --depth=1 --branch v0.5.0 https://github.com/grimoire/amirstan_plugin.git && \
cd amirstan_plugin && \
cmake -DTENSORRT_DIR=/usr/lib/x86_64-linux-gnu -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc && \
make -j$(nproc)

ENV AMIRSTAN_LIBRARY_PATH=/root/amirstan_plugin/lib

WORKDIR /root/yolact_edge

ENV LANG C.UTF-8
30 changes: 21 additions & 9 deletions docker/Dockerfile.xavier
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# (ex. In this case, use AGX Xavier)
# $ docker build --build-arg L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.5.0-pth1.6-py3 --build-arg JETSON_PLATFORM=t194

ARG L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.4.4-pth1.6-py3
ARG L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.7.1-pth1.10-py3
ARG JETSON_PLATFORM=t194

FROM ${L4T_IMAGE}
Expand All @@ -42,19 +42,31 @@ RUN L4T_REPO_VERSION=`python3 -c 'import sys; print(".".join((sys.argv[1].split(
RUN ln -svf /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade --force pip

# # Install dependencies
# Install dependencies
RUN pip install cython pillow matplotlib GitPython termcolor tensorboard
RUN pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"

# torch2trt
# torch2trt_dynamic
WORKDIR /root
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt &&\
cd torch2trt &&\
python setup.py install --plugins
RUN git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic
WORKDIR /root/torch2trt_dynamic
RUN python setup.py develop


# installing plugins
WORKDIR /root
RUN ln -s /yolact_edge
RUN ln -s /datasets
WORKDIR /root/yolact_edge
RUN apt update && apt install -y software-properties-common && \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \
apt update && apt install -y cmake && \
git clone --depth=1 https://github.com/grimoire/amirstan_plugin.git && \
cd amirstan_plugin && \
git submodule update --init --progress --depth=1 && \
cmake -DTENSORRT_DIR=/usr/include/aarch64-linux-gnu && \
make -j$(nproc)

ENV AMIRSTAN_LIBRARY_PATH=/root/amirstan_plugin/lib

WORKDIR /workspace

ENV LANG C.UTF-8
4 changes: 2 additions & 2 deletions docker/run_xavier.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ docker build -t yolact_edge -f Dockerfile.xavier .
docker run -it --rm --net=host --privileged \
--runtime nvidia -e DISPLAY=$DISPLAY \
-v /tmp/.X11-unix/:/tmp/.X11-unix \
-v $PWD/../:/yolact_edge/:rw \
-v $PWD/../:/workspace/yolact_edge/:rw \
--device /dev/video0:/dev/video0 \
yolact_edge \
python3 eval.py --trained_model=./weights/yolact_edge_resnet50_54_800000.pth \
Expand All @@ -11,4 +11,4 @@ docker run -it --rm --net=host --privileged \
--video_multiframe=2 \
--trt_batch_size 2 \
--video=0 \
--calib_images ./data/coco/images
--calib_images ./data/coco/images
8 changes: 5 additions & 3 deletions docker/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
SOURCE_CODE=$1
DATASETS=$2

docker build -t yolact_edge:11.4_cuda8.2 -f Dockerfile .

docker run --gpus all -it --name=yolact_edge \
--shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-v $SOURCE_CODE:/yolact_edge/:rw \
--shm-size=64gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-v $SOURCE_CODE:/root/yolact_edge/:rw \
-v $DATASETS:/datasets/:ro \
yolact_edge_image
yolact_edge:11.4_cuda8.2
16 changes: 16 additions & 0 deletions external/mod_def_conv/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

if __name__ == '__main__':
setup(
name='mod_dcn_op_v2',
ext_modules=[
CUDAExtension(
'mod_dcn_op_v2',
sources=['src/modulated_deform_conv.cpp', 'src/modulated_deform_conv_cuda.cu'],
)
],
cmdclass={
'build_ext': BuildExtension
}
)
112 changes: 112 additions & 0 deletions external/mod_def_conv/src/common_cuda_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#ifndef COMMON_CUDA_HELPER
#define COMMON_CUDA_HELPER

#include <cuda.h>

#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)

#define THREADS_PER_BLOCK 512

#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

inline int GET_BLOCKS(const int N) {
int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
int max_block_num = 4096;
return min(optimal_block_num, max_block_num);
}

template <typename T>
__device__ T bilinear_interpolate(const T* input, const int height,
const int width, T y, T x,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;

if (y <= 0) y = 0;
if (x <= 0) x = 0;

int y_low = (int)y;
int x_low = (int)x;
int y_high;
int x_high;

if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}

if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}

T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
// do bilinear interpolation
T v1 = input[y_low * width + x_low];
T v2 = input[y_low * width + x_high];
T v3 = input[y_high * width + x_low];
T v4 = input[y_high * width + x_high];
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

return val;
}

template <typename T>
__device__ void bilinear_interpolate_gradient(
const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4,
int& x_low, int& x_high, int& y_low, int& y_high,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
// empty
w1 = w2 = w3 = w4 = 0.;
x_low = x_high = y_low = y_high = -1;
return;
}

if (y <= 0) y = 0;
if (x <= 0) x = 0;

y_low = (int)y;
x_low = (int)x;

if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}

if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}

T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;

// reference in forward
// T v1 = input[y_low * width + x_low];
// T v2 = input[y_low * width + x_high];
// T v3 = input[y_high * width + x_low];
// T v4 = input[y_high * width + x_high];
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

return;
}
#endif // COMMON_CUDA_HELPER
Loading

0 comments on commit 3f423ed

Please sign in to comment.