diff --git a/README.md b/README.md new file mode 100644 index 0000000..bcb9644 --- /dev/null +++ b/README.md @@ -0,0 +1,225 @@ + +# Exportable DensePose inference using TorchScript + +### This is unofficial inference implementation of [DensePose from detectron2](https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose) + +The project is focused on creating simple and TorchScript compilable inference interface for the original pretrained +models to free them from the heavy dependency on the detectron2 framework. + +#### Only inference is supported, no training. Also no confidence estimation or bootstapping pipelines were implemented. + +# Quickstart +To run already exported model (which you might find in the +[Releases](https://github.com/dajes/DensePose-TorchScript/releases) section) you only need PyTorch and OpenCV +(for image reading): + +``` +pip install torch torchvision opencv-python +``` + +Then you can run the model using the small example script: + +``` +python run.py +``` +This will run the model and save the result in the same directory as the input. + + +## Exporting a model by yourself + +To export a model you need to have a model checkpoint and a config file. You can find them in the table below + +``` +python export.py [--fp16] +``` + +If --fp16 is specified, the model will be exported in fp16 mode. This will reduce the model size at the cost of +precision. + +Example of exporting an R_50_FPN_s1x_legacy model into fp16 format model: + +``` +python export.py configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl --fp16 +``` + +### License + +All models available for download are licensed under the +[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/) + +### Legacy Models + +Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1x_legacys1x0.3070.0513.258.158.252.154.9164832157model | metrics
R_101_FPN_s1x_legacys1x0.3900.0634.359.559.353.256.0164832182model | metrics
+ +``` +python export.py configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl +``` + +``` +python export.py configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl +``` + +### Improved Baselines, Original Fully Convolutional Head + +These models use an improved training schedule and Panoptic FPN head +from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1xs1x0.3590.0664.561.267.263.765.3165712039model | metrics
R_101_FPN_s1xs1x0.4280.0795.862.367.864.566.2165712084model | metrics
+ +``` +python export.py configs/densepose_rcnn_R_50_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl +``` + +``` +python export.py configs/densepose_rcnn_R_101_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl +``` + +### Improved Baselines, DeepLabV3 Head + +These models use an improved training schedule, Panoptic FPN head +from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head +from [Chen et al, 2017](https://arxiv.org/abs/1706.05587). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_DL_s1xs1x0.3920.0706.761.168.365.666.7165712097model | metrics
R_101_FPN_DL_s1xs1x0.4780.0837.062.368.766.367.6165712116model | metrics
+ +``` +python export.py configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl +``` + +``` +python export.py configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl +``` + +``` +@InProceedings{Guler2018DensePose, + title={DensePose: Dense Human Pose Estimation In The Wild}, + author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos}, + journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2018} +} +``` \ No newline at end of file diff --git a/detectron2/modeling/postprocessing.py b/detectron2/modeling/postprocessing.py index d169658..8340014 100644 --- a/detectron2/modeling/postprocessing.py +++ b/detectron2/modeling/postprocessing.py @@ -47,12 +47,11 @@ def detector_postprocess( output_boxes = results['pred_boxes'] scale_boxes(output_boxes, scale_x, scale_y) - clip_boxes(output_boxes, results['image_size']) keep = nonempty_boxes(output_boxes) return { 'image_size': new_size, - 'pred_boxes': output_boxes[keep], + 'pred_boxes': clip_boxes(output_boxes[keep], new_size), 'scores': results['scores'][keep], 'pred_classes': results['pred_classes'][keep], 'pred_densepose_coarse_segm': results['pred_densepose_coarse_segm'][keep], diff --git a/export.py b/export.py index c05119d..0f962cd 100644 --- a/export.py +++ b/export.py @@ -12,7 +12,7 @@ def main(): parser = argparse.ArgumentParser(description='Export DensePose model to TorchScript module') parser.add_argument("cfg", type=str, help="Config file") parser.add_argument("model", type=str, help="Model file") - parser.add_argument("--min_score", default=0.8, type=float, + parser.add_argument("--min_score", default=0.3, type=float, help="Minimum detection score to visualize") parser.add_argument("--nms_thresh", metavar="", default=None, type=float, help="NMS threshold") diff --git a/run.py b/run.py index 718bdb4..d82f10b 100644 --- a/run.py +++ b/run.py @@ -1,5 +1,6 @@ import argparse import os +from itertools import count import cv2 import torch @@ -11,23 +12,53 @@ parser.add_argument("model", type=str, help="Model file") parser.add_argument("input", type=str, help="Input data") parser.add_argument("--cpu", action="store_true", help="Only use CPU") +parser.add_argument("--fp32", action="store_true", help="Only use FP32") args = parser.parse_args() -file_list = [args.input] -img = cv2.imread(args.input) -tensor = torch.from_numpy(img) - -visualizer = End2EndVisualizer(alpha=1.0, keep_bg=False) -predictor = torch.jit.load(args.model) +visualizer = End2EndVisualizer(alpha=.7, keep_bg=False) +predictor = torch.jit.load(args.model).eval() if torch.cuda.is_available() and not args.cpu: - tensor = tensor.cuda() + device = torch.device("cuda") predictor = predictor.cuda() + if args.fp32: + predictor = predictor.float() + else: + predictor = predictor.half() else: + device = torch.device("cpu") predictor = predictor.float() -outputs = predictor(tensor) -image_vis = visualizer.visualize(img, outputs) - save_path = "_pred".join(os.path.splitext(args.input)) -cv2.imwrite(save_path, image_vis) -print(f"Image saved to {save_path}") +if os.path.splitext(args.input)[1].lower() in [".jpg", ".png", ".jpeg", ".bmp", ".tif", ".tiff"]: + img = cv2.imread(args.input) + tensor = torch.from_numpy(img) + + outputs = predictor(tensor) + image_vis = visualizer.visualize(img, outputs) + + cv2.imwrite(save_path, image_vis) + print(f"Image saved to {save_path}") +else: + cap = cv2.VideoCapture(args.input) + n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + writer = None + try: + for i in count(): + ret, frame = cap.read() + if not ret: + break + tensor = torch.from_numpy(frame) + outputs = predictor(tensor) + image_vis = visualizer.visualize(frame, outputs) + if writer is None: + writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (image_vis.shape[1], image_vis.shape[0])) + writer.write(image_vis) + print(f"Frame {i + 1}/{n_frames} processed", end="\r") + except KeyboardInterrupt: + pass + if writer is not None: + writer.release() + print(f"Video saved to {save_path}") + else: + print("No frames processed") diff --git a/visualizer.py b/visualizer.py index d4e6532..83b63fd 100644 --- a/visualizer.py +++ b/visualizer.py @@ -21,8 +21,8 @@ def resample_uv_tensors_to_bbox(u: torch.Tensor, v: torch.Tensor, labels: torch. x, y, w, h = box_xywh_abs w = max(int(w), 1) h = max(int(h), 1) - u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False) - v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False) + u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False).float() + v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False).float() uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device) for part_id in range(1, u_bbox.size(1)): uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]