Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add deformable conv to repo #190

Merged
merged 4 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 36 additions & 13 deletions INSTALL.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,59 @@
## Installation
- Set up a Python3 environment.
- Install [Pytorch](http://pytorch.org/) 1.6.0 and TorchVision.
- Install [TensorRT](https://developer.nvidia.com/tensorrt) 7.1.3.4 and [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt) 0.1.0 (*optional* for evaluating models without TensorRT, currently TensorRT optimization only supports devices with [Tensor Cores](https://www.nvidia.com/en-us/data-center/tensor-cores/), and already included in [JetPack SDK](https://developer.nvidia.com/embedded/Jetpack) if using Jetson devices):
1. Install CUDA 10.2/11.0 and cuDNN 8.0.0.
2. Download TensorRT 7.1.3.4 tar file [here](https://developer.nvidia.com/nvidia-tensorrt-7x-download) and install TensorRT (refer to [official documentation](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-713/install-guide/index.html#installing-tar) for more details).
- Install [Pytorch](http://pytorch.org/) 1.7.1 and TorchVision v.0.8.2.
- Install [TensorRT](https://developer.nvidia.com/tensorrt) 8.2.1.8 and [torch2trt_dynamic](https://github.com/grimoire/torch2trt_dynamic) v0.5.0 (*optional* for evaluating models without TensorRT, currently TensorRT optimization only supports devices with [Tensor Cores](https://www.nvidia.com/en-us/data-center/tensor-cores/), and already included in [JetPack SDK](https://developer.nvidia.com/embedded/Jetpack) if using Jetson devices):
1. Install CUDA 10.2/11.4 and cuDNN 8.2.
2. Download TensorRT 8.2.1.8 tar file [here](https://developer.nvidia.com/nvidia-tensorrt-8x-download) and install TensorRT (refer to [official documentation](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-821/install-guide/index.html#installing-tar) for more details).
```Shell
tar xzvf TensorRT-${version}.${os}.${arch}-gnu.${cuda}.${cudnn}.tar.gz
version="8.x.x.x"
arch=$(uname -m)
cuda="cuda-x.x"
cudnn="cudnn8.x"
tar xzvf TensorRT-${version}.Linux.${arch}-gnu.${cuda}.${cudnn}.tar.gz
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<TensorRT-${version}/lib>

cd TensorRT-${version}/python
pip3 install tensorrt-*-cp3x-none-linux_x86_64.whl
python3 -m pip install tensorrt-*-cp3x-none-linux_x86_64.whl

cd TensorRT-${version}/uff
pip3 install uff-0.6.9-py2.py3-none-any.whl
python3 -m pip install uff-0.6.9-py2.py3-none-any.whl

cd TensorRT-${version}/graphsurgeon
pip3 install graphsurgeon-0.4.5-py2.py3-none-any.whl
python3 -m pip install graphsurgeon-0.4.5-py2.py3-none-any.whl
```
3. Install [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt).
3. Install [torch2trt_dynamic](https://github.com/grimoire/torch2trt_dynamic).
```Shell
git clone https://github.com/NVIDIA-AI-IOT/torch2trt
cd torch2trt
sudo python setup.py install --plugins
git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic
cd torch2trt_dynamic
python setup.py develop
```
4. Install deformable convolution module to pytorch if you want to work with yolact_edge+ models. Go to ./external/mod_def_conv and run setup.py
```Shell
cd external/mod_def_conv
python setup.py install
```
5. Install [amirstan_plugin](https://github.com/grimoire/amirstan_plugin) which contain the deformable convolution plugin with dynamic shapes for TensorRT 8.x. IT is needed only if you want to work with yolact edge+ models.
```Shell
apt install -y software-properties-common
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
apt update && apt install -y cmake
git clone --depth=1 --branch v0.5.0 https://github.com/grimoire/amirstan_plugin.git
cd amirstan_plugin
cmake -DTENSORRT_DIR=/usr/lib/x86_64-linux-gnu -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc
make -j$(nproc)

export AMIRSTAN_LIBRARY_PATH=<amirstan_plugin_root>/lib
```


- Install some other packages:
```Shell
# Cython needs to be installed before pycocotools
pip install cython
pip install opencv-python pillow matplotlib
pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
pip install GitPython termcolor tensorboard
pip install GitPython termcolor tensorboard packaging
```
- Clone this repository and enter it:
```Shell
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ python train.py --config=yolact_edge_vid_trainflow_config --resume=./weights/yol
python train.py --config=yolact_edge_vid_config --resume=./weights/yolact_edge_vid_trainflow_144_100000.pth
```

### Experimental
One can try to train yolact edge+ models with deformable convolutions. For installation instructions see [INSTALL.md](INSTALL.md)
```Shell
python train.py --config=yolact_edge_plus_config
```

### Custom Datasets
You can also train on your own dataset by following these steps:
Expand Down
68 changes: 38 additions & 30 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,43 +1,51 @@
FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04
FROM nvcr.io/nvidia/cuda:11.4.0-cudnn8-devel-ubuntu18.04


ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
git wget sudo build-essential \
git wget build-essential \
python3 python3-setuptools python3-pip python3-dev python3-tk \
ffmpeg libsm6 libxext6
RUN ln -svf /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade --force pip

# TensorRT
ARG version="8.0.5.39-1+cuda11.0"
RUN apt-get update && apt-get install -y libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev
ARG version="7.2.3-1+cuda11.0"
# CUDNN
ARG version="8.2.2.26-1+cuda11.4"
RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages \
libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev

# Install Tensorrt 8.2.1.8
ARG version="8.2.1-1+cuda11.4"
RUN apt-get update && \
apt-get install -y libnvinfer7=${version} libnvonnxparsers7=${version} libnvparsers7=${version} libnvinfer-plugin7=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \
apt-mark hold libnvinfer7 libnvonnxparsers7 libnvparsers7 libnvinfer-plugin7 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer

# create a non-root user
ARG USER_ID=1000
ARG USER=appuser
RUN useradd -m --no-log-init --system --uid ${USER_ID} ${USER} -g sudo
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
USER ${USER}
WORKDIR /home/${USER}
ENV PATH="/home/${USER}/.local/bin:${PATH}"
apt-get install -y libnvinfer8=${version} libnvonnxparsers8=${version} libnvparsers8=${version} libnvinfer-plugin8=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \
apt-mark hold libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer


# # Install dependencies
RUN pip install --user cython opencv-python pillow matplotlib GitPython termcolor tensorboard
RUN pip install --user git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
RUN pip install --user torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html

# torch2trt
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt
WORKDIR /home/${USER}/torch2trt
RUN python setup.py install --plugins --user

WORKDIR /home/${USER}
RUN ln -s /yolact_edge
RUN ln -s /datasets
WORKDIR /home/${USER}/yolact_edge
RUN pip install cython opencv-python pillow matplotlib GitPython termcolor tensorboard packaging
RUN pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"
RUN pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html

# torch2trt_dynamic
WORKDIR /root
RUN git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic && \
cd torch2trt_dynamic && \
python setup.py develop

# installing plugins for torch2trt_dynamic
WORKDIR /root

RUN apt install -y software-properties-common && \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \
apt update && apt install -y cmake && \
git clone --depth=1 --branch v0.5.0 https://github.com/grimoire/amirstan_plugin.git && \
cd amirstan_plugin && \
cmake -DTENSORRT_DIR=/usr/lib/x86_64-linux-gnu -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc && \
make -j$(nproc)

ENV AMIRSTAN_LIBRARY_PATH=/root/amirstan_plugin/lib

WORKDIR /root/yolact_edge

ENV LANG C.UTF-8
30 changes: 21 additions & 9 deletions docker/Dockerfile.xavier
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# (ex. In this case, use AGX Xavier)
# $ docker build --build-arg L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.5.0-pth1.6-py3 --build-arg JETSON_PLATFORM=t194

ARG L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.4.4-pth1.6-py3
ARG L4T_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.7.1-pth1.10-py3
ARG JETSON_PLATFORM=t194

FROM ${L4T_IMAGE}
Expand All @@ -42,19 +42,31 @@ RUN L4T_REPO_VERSION=`python3 -c 'import sys; print(".".join((sys.argv[1].split(
RUN ln -svf /usr/bin/python3 /usr/bin/python
RUN python -m pip install --upgrade --force pip

# # Install dependencies
# Install dependencies
RUN pip install cython pillow matplotlib GitPython termcolor tensorboard
RUN pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI"

# torch2trt
# torch2trt_dynamic
WORKDIR /root
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt &&\
cd torch2trt &&\
python setup.py install --plugins
RUN git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic
WORKDIR /root/torch2trt_dynamic
RUN python setup.py develop


# installing plugins
WORKDIR /root
RUN ln -s /yolact_edge
RUN ln -s /datasets
WORKDIR /root/yolact_edge
RUN apt update && apt install -y software-properties-common && \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \
apt update && apt install -y cmake && \
git clone --depth=1 https://github.com/grimoire/amirstan_plugin.git && \
cd amirstan_plugin && \
git submodule update --init --progress --depth=1 && \
cmake -DTENSORRT_DIR=/usr/include/aarch64-linux-gnu && \
make -j$(nproc)

ENV AMIRSTAN_LIBRARY_PATH=/root/amirstan_plugin/lib

WORKDIR /workspace

ENV LANG C.UTF-8
4 changes: 2 additions & 2 deletions docker/run_xavier.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ docker build -t yolact_edge -f Dockerfile.xavier .
docker run -it --rm --net=host --privileged \
--runtime nvidia -e DISPLAY=$DISPLAY \
-v /tmp/.X11-unix/:/tmp/.X11-unix \
-v $PWD/../:/yolact_edge/:rw \
-v $PWD/../:/workspace/yolact_edge/:rw \
--device /dev/video0:/dev/video0 \
yolact_edge \
python3 eval.py --trained_model=./weights/yolact_edge_resnet50_54_800000.pth \
Expand All @@ -11,4 +11,4 @@ docker run -it --rm --net=host --privileged \
--video_multiframe=2 \
--trt_batch_size 2 \
--video=0 \
--calib_images ./data/coco/images
--calib_images ./data/coco/images
8 changes: 5 additions & 3 deletions docker/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
SOURCE_CODE=$1
DATASETS=$2

docker build -t yolact_edge:11.4_cuda8.2 -f Dockerfile .

docker run --gpus all -it --name=yolact_edge \
--shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-v $SOURCE_CODE:/yolact_edge/:rw \
--shm-size=64gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-v $SOURCE_CODE:/root/yolact_edge/:rw \
-v $DATASETS:/datasets/:ro \
yolact_edge_image
yolact_edge:11.4_cuda8.2
16 changes: 16 additions & 0 deletions external/mod_def_conv/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

if __name__ == '__main__':
setup(
name='mod_dcn_op_v2',
ext_modules=[
CUDAExtension(
'mod_dcn_op_v2',
sources=['src/modulated_deform_conv.cpp', 'src/modulated_deform_conv_cuda.cu'],
)
],
cmdclass={
'build_ext': BuildExtension
}
)
112 changes: 112 additions & 0 deletions external/mod_def_conv/src/common_cuda_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#ifndef COMMON_CUDA_HELPER
#define COMMON_CUDA_HELPER

#include <cuda.h>

#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)

#define THREADS_PER_BLOCK 512

#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

inline int GET_BLOCKS(const int N) {
int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
int max_block_num = 4096;
return min(optimal_block_num, max_block_num);
}

template <typename T>
__device__ T bilinear_interpolate(const T* input, const int height,
const int width, T y, T x,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;

if (y <= 0) y = 0;
if (x <= 0) x = 0;

int y_low = (int)y;
int x_low = (int)x;
int y_high;
int x_high;

if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}

if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}

T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
// do bilinear interpolation
T v1 = input[y_low * width + x_low];
T v2 = input[y_low * width + x_high];
T v3 = input[y_high * width + x_low];
T v4 = input[y_high * width + x_high];
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

return val;
}

template <typename T>
__device__ void bilinear_interpolate_gradient(
const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4,
int& x_low, int& x_high, int& y_low, int& y_high,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
// empty
w1 = w2 = w3 = w4 = 0.;
x_low = x_high = y_low = y_high = -1;
return;
}

if (y <= 0) y = 0;
if (x <= 0) x = 0;

y_low = (int)y;
x_low = (int)x;

if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}

if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}

T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;

// reference in forward
// T v1 = input[y_low * width + x_low];
// T v2 = input[y_low * width + x_high];
// T v3 = input[y_high * width + x_low];
// T v4 = input[y_high * width + x_high];
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);

w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

return;
}
#endif // COMMON_CUDA_HELPER
Loading