-
Notifications
You must be signed in to change notification settings - Fork 273
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #190 from Kracozebr/master
Add deformable conv to repo
- Loading branch information
Showing
18 changed files
with
1,739 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,51 @@ | ||
FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04 | ||
FROM nvcr.io/nvidia/cuda:11.4.0-cudnn8-devel-ubuntu18.04 | ||
|
||
|
||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt-get update && apt-get install -y \ | ||
git wget sudo build-essential \ | ||
git wget build-essential \ | ||
python3 python3-setuptools python3-pip python3-dev python3-tk \ | ||
ffmpeg libsm6 libxext6 | ||
RUN ln -svf /usr/bin/python3 /usr/bin/python | ||
RUN python -m pip install --upgrade --force pip | ||
|
||
# TensorRT | ||
ARG version="8.0.5.39-1+cuda11.0" | ||
RUN apt-get update && apt-get install -y libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev | ||
ARG version="7.2.3-1+cuda11.0" | ||
# CUDNN | ||
ARG version="8.2.2.26-1+cuda11.4" | ||
RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages \ | ||
libcudnn8=${version} libcudnn8-dev=${version} && apt-mark hold libcudnn8 libcudnn8-dev | ||
|
||
# Install Tensorrt 8.2.1.8 | ||
ARG version="8.2.1-1+cuda11.4" | ||
RUN apt-get update && \ | ||
apt-get install -y libnvinfer7=${version} libnvonnxparsers7=${version} libnvparsers7=${version} libnvinfer-plugin7=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \ | ||
apt-mark hold libnvinfer7 libnvonnxparsers7 libnvparsers7 libnvinfer-plugin7 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer | ||
|
||
# create a non-root user | ||
ARG USER_ID=1000 | ||
ARG USER=appuser | ||
RUN useradd -m --no-log-init --system --uid ${USER_ID} ${USER} -g sudo | ||
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers | ||
USER ${USER} | ||
WORKDIR /home/${USER} | ||
ENV PATH="/home/${USER}/.local/bin:${PATH}" | ||
apt-get install -y libnvinfer8=${version} libnvonnxparsers8=${version} libnvparsers8=${version} libnvinfer-plugin8=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python3-libnvinfer=${version} && \ | ||
apt-mark hold libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-dev libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev python3-libnvinfer | ||
|
||
|
||
# # Install dependencies | ||
RUN pip install --user cython opencv-python pillow matplotlib GitPython termcolor tensorboard | ||
RUN pip install --user git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI" | ||
RUN pip install --user torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html | ||
|
||
# torch2trt | ||
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt | ||
WORKDIR /home/${USER}/torch2trt | ||
RUN python setup.py install --plugins --user | ||
|
||
WORKDIR /home/${USER} | ||
RUN ln -s /yolact_edge | ||
RUN ln -s /datasets | ||
WORKDIR /home/${USER}/yolact_edge | ||
RUN pip install cython opencv-python pillow matplotlib GitPython termcolor tensorboard packaging | ||
RUN pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI" | ||
RUN pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html | ||
|
||
# torch2trt_dynamic | ||
WORKDIR /root | ||
RUN git clone https://github.com/grimoire/torch2trt_dynamic.git torch2trt_dynamic && \ | ||
cd torch2trt_dynamic && \ | ||
python setup.py develop | ||
|
||
# installing plugins for torch2trt_dynamic | ||
WORKDIR /root | ||
|
||
RUN apt install -y software-properties-common && \ | ||
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ | ||
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \ | ||
apt update && apt install -y cmake && \ | ||
git clone --depth=1 --branch v0.5.0 https://github.com/grimoire/amirstan_plugin.git && \ | ||
cd amirstan_plugin && \ | ||
cmake -DTENSORRT_DIR=/usr/lib/x86_64-linux-gnu -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc && \ | ||
make -j$(nproc) | ||
|
||
ENV AMIRSTAN_LIBRARY_PATH=/root/amirstan_plugin/lib | ||
|
||
WORKDIR /root/yolact_edge | ||
|
||
ENV LANG C.UTF-8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from setuptools import setup | ||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension | ||
|
||
if __name__ == '__main__': | ||
setup( | ||
name='mod_dcn_op_v2', | ||
ext_modules=[ | ||
CUDAExtension( | ||
'mod_dcn_op_v2', | ||
sources=['src/modulated_deform_conv.cpp', 'src/modulated_deform_conv_cuda.cu'], | ||
) | ||
], | ||
cmdclass={ | ||
'build_ext': BuildExtension | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#ifndef COMMON_CUDA_HELPER | ||
#define COMMON_CUDA_HELPER | ||
|
||
#include <cuda.h> | ||
|
||
#define CUDA_1D_KERNEL_LOOP(i, n) \ | ||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ | ||
i += blockDim.x * gridDim.x) | ||
|
||
#define THREADS_PER_BLOCK 512 | ||
|
||
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) | ||
|
||
inline int GET_BLOCKS(const int N) { | ||
int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; | ||
int max_block_num = 4096; | ||
return min(optimal_block_num, max_block_num); | ||
} | ||
|
||
template <typename T> | ||
__device__ T bilinear_interpolate(const T* input, const int height, | ||
const int width, T y, T x, | ||
const int index /* index for debug only*/) { | ||
// deal with cases that inverse elements are out of feature map boundary | ||
if (y < -1.0 || y > height || x < -1.0 || x > width) return 0; | ||
|
||
if (y <= 0) y = 0; | ||
if (x <= 0) x = 0; | ||
|
||
int y_low = (int)y; | ||
int x_low = (int)x; | ||
int y_high; | ||
int x_high; | ||
|
||
if (y_low >= height - 1) { | ||
y_high = y_low = height - 1; | ||
y = (T)y_low; | ||
} else { | ||
y_high = y_low + 1; | ||
} | ||
|
||
if (x_low >= width - 1) { | ||
x_high = x_low = width - 1; | ||
x = (T)x_low; | ||
} else { | ||
x_high = x_low + 1; | ||
} | ||
|
||
T ly = y - y_low; | ||
T lx = x - x_low; | ||
T hy = 1. - ly, hx = 1. - lx; | ||
// do bilinear interpolation | ||
T v1 = input[y_low * width + x_low]; | ||
T v2 = input[y_low * width + x_high]; | ||
T v3 = input[y_high * width + x_low]; | ||
T v4 = input[y_high * width + x_high]; | ||
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; | ||
|
||
T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); | ||
|
||
return val; | ||
} | ||
|
||
template <typename T> | ||
__device__ void bilinear_interpolate_gradient( | ||
const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, | ||
int& x_low, int& x_high, int& y_low, int& y_high, | ||
const int index /* index for debug only*/) { | ||
// deal with cases that inverse elements are out of feature map boundary | ||
if (y < -1.0 || y > height || x < -1.0 || x > width) { | ||
// empty | ||
w1 = w2 = w3 = w4 = 0.; | ||
x_low = x_high = y_low = y_high = -1; | ||
return; | ||
} | ||
|
||
if (y <= 0) y = 0; | ||
if (x <= 0) x = 0; | ||
|
||
y_low = (int)y; | ||
x_low = (int)x; | ||
|
||
if (y_low >= height - 1) { | ||
y_high = y_low = height - 1; | ||
y = (T)y_low; | ||
} else { | ||
y_high = y_low + 1; | ||
} | ||
|
||
if (x_low >= width - 1) { | ||
x_high = x_low = width - 1; | ||
x = (T)x_low; | ||
} else { | ||
x_high = x_low + 1; | ||
} | ||
|
||
T ly = y - y_low; | ||
T lx = x - x_low; | ||
T hy = 1. - ly, hx = 1. - lx; | ||
|
||
// reference in forward | ||
// T v1 = input[y_low * width + x_low]; | ||
// T v2 = input[y_low * width + x_high]; | ||
// T v3 = input[y_high * width + x_low]; | ||
// T v4 = input[y_high * width + x_high]; | ||
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); | ||
|
||
w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; | ||
|
||
return; | ||
} | ||
#endif // COMMON_CUDA_HELPER |
Oops, something went wrong.