Wrap up project

dajes · Dec 24, 2023 · 3faffbf · 3faffbf
1 parent 21361b6
commit 3faffbf
Show file tree

Hide file tree

Showing 5 changed files with 272 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,225 @@
+<a href="https://savelife.in.ua/en/donate-en/"><img src="https://savelife.in.ua/wp-content/themes/savelife/assets/images/new-logo-en.svg" width=120px></a>
+# Exportable DensePose inference using TorchScript
+
+### This is unofficial inference implementation of [DensePose from detectron2](https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose)
+
+The project is focused on creating simple and TorchScript compilable inference interface for the original pretrained
+models to free them from the heavy dependency on the detectron2 framework.
+
+#### Only inference is supported, no training. Also no confidence estimation or bootstapping pipelines were implemented.
+
+# Quickstart
+To run already exported model (which you might find in the 
+[Releases](https://github.com/dajes/DensePose-TorchScript/releases) section) you only need PyTorch and OpenCV 
+(for image reading):
+
+```
+pip install torch torchvision opencv-python
+```
+
+Then you can run the model using the small example script:
+
+```
+python run.py <model.pt> <input.[jpg|png|mp4|avi]>
+```
+This will run the model and save the result in the same directory as the input.
+
+
+## Exporting a model by yourself
+
+To export a model you need to have a model checkpoint and a config file. You can find them in the table below
+
+```
+python export.py <config> <model> [--fp16]
+```
+
+If --fp16 is specified, the model will be exported in fp16 mode. This will reduce the model size at the cost of
+precision.
+
+Example of exporting an R_50_FPN_s1x_legacy model into fp16 format model:
+
+```
+python export.py configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl --fp16
+```
+
+### License
+
+All models available for download are licensed under the
+[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
+
+### Legacy Models
+
+Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">segm<br/>AP</th>
+<th valign="bottom">dp. AP<br/>GPS</th>
+<th valign="bottom">dp. AP<br/>GPSm</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
+<td align="center">s1x</td>
+<td align="center">0.307</td>
+<td align="center">0.051</td>
+<td align="center">3.2</td>
+<td align="center">58.1</td>
+<td align="center">58.2</td>
+<td align="center">52.1</td>
+<td align="center">54.9</td>
+<td align="center">164832157</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
+<td align="center">s1x</td>
+<td align="center">0.390</td>
+<td align="center">0.063</td>
+<td align="center">4.3</td>
+<td align="center">59.5</td>
+<td align="center">59.3</td>
+<td align="center">53.2</td>
+<td align="center">56.0</td>
+<td align="center">164832182</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+```
+python export.py configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl
+```
+
+```
+python export.py configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl
+```
+
+### Improved Baselines, Original Fully Convolutional Head
+
+These models use an improved training schedule and Panoptic FPN head
+from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">segm<br/>AP</th>
+<th valign="bottom">dp. AP<br/>GPS</th>
+<th valign="bottom">dp. AP<br/>GPSm</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.359</td>
+<td align="center">0.066</td>
+<td align="center">4.5</td>
+<td align="center">61.2</td>
+<td align="center">67.2</td>
+<td align="center">63.7</td>
+<td align="center">65.3</td>
+<td align="center">165712039</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.428</td>
+<td align="center">0.079</td>
+<td align="center">5.8</td>
+<td align="center">62.3</td>
+<td align="center">67.8</td>
+<td align="center">64.5</td>
+<td align="center">66.2</td>
+<td align="center">165712084</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+```
+python export.py configs/densepose_rcnn_R_50_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl
+```
+
+```
+python export.py configs/densepose_rcnn_R_101_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl
+```
+
+### <a name="ModelZooDeepLabV3"> Improved Baselines, DeepLabV3 Head
+
+These models use an improved training schedule, Panoptic FPN head
+from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head
+from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">segm<br/>AP</th>
+<th valign="bottom">dp. AP<br/>GPS</th>
+<th valign="bottom">dp. AP<br/>GPSm</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.392</td>
+<td align="center">0.070</td>
+<td align="center">6.7</td>
+<td align="center">61.1</td>
+<td align="center">68.3</td>
+<td align="center">65.6</td>
+<td align="center">66.7</td>
+<td align="center">165712097</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
+<tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.478</td>
+<td align="center">0.083</td>
+<td align="center">7.0</td>
+<td align="center">62.3</td>
+<td align="center">68.7</td>
+<td align="center">66.3</td>
+<td align="center">67.6</td>
+<td align="center">165712116</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+```
+python export.py configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl
+```
+
+```
+python export.py configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl
+```
+
+```
+@InProceedings{Guler2018DensePose,
+  title={DensePose: Dense Human Pose Estimation In The Wild},
+  author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
+  journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2018}
+}
+```
diff --git a/detectron2/modeling/postprocessing.py b/detectron2/modeling/postprocessing.py
@@ -47,12 +47,11 @@ def detector_postprocess(
 
     output_boxes = results['pred_boxes']
     scale_boxes(output_boxes, scale_x, scale_y)
-    clip_boxes(output_boxes, results['image_size'])
 
     keep = nonempty_boxes(output_boxes)
     return {
         'image_size': new_size,
-        'pred_boxes': output_boxes[keep],
+        'pred_boxes': clip_boxes(output_boxes[keep], new_size),
         'scores': results['scores'][keep],
         'pred_classes': results['pred_classes'][keep],
         'pred_densepose_coarse_segm': results['pred_densepose_coarse_segm'][keep],

diff --git a/export.py b/export.py
@@ -12,7 +12,7 @@ def main():
     parser = argparse.ArgumentParser(description='Export DensePose model to TorchScript module')
     parser.add_argument("cfg", type=str, help="Config file")
     parser.add_argument("model", type=str, help="Model file")
-    parser.add_argument("--min_score", default=0.8, type=float,
+    parser.add_argument("--min_score", default=0.3, type=float,
                         help="Minimum detection score to visualize")
     parser.add_argument("--nms_thresh", metavar="<threshold>", default=None, type=float,
                         help="NMS threshold")

diff --git a/run.py b/run.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+from itertools import count
 
 import cv2
 import torch
@@ -11,23 +12,53 @@
 parser.add_argument("model", type=str, help="Model file")
 parser.add_argument("input", type=str, help="Input data")
 parser.add_argument("--cpu", action="store_true", help="Only use CPU")
+parser.add_argument("--fp32", action="store_true", help="Only use FP32")
 args = parser.parse_args()
-file_list = [args.input]
-img = cv2.imread(args.input)
-tensor = torch.from_numpy(img)
-
-visualizer = End2EndVisualizer(alpha=1.0, keep_bg=False)
-predictor = torch.jit.load(args.model)
+visualizer = End2EndVisualizer(alpha=.7, keep_bg=False)
+predictor = torch.jit.load(args.model).eval()
 
 if torch.cuda.is_available() and not args.cpu:
-    tensor = tensor.cuda()
+    device = torch.device("cuda")
     predictor = predictor.cuda()
+    if args.fp32:
+        predictor = predictor.float()
+    else:
+        predictor = predictor.half()
 else:
+    device = torch.device("cpu")
     predictor = predictor.float()
 
-outputs = predictor(tensor)
-image_vis = visualizer.visualize(img, outputs)
-
 save_path = "_pred".join(os.path.splitext(args.input))
-cv2.imwrite(save_path, image_vis)
-print(f"Image saved to {save_path}")
+if os.path.splitext(args.input)[1].lower() in [".jpg", ".png", ".jpeg", ".bmp", ".tif", ".tiff"]:
+    img = cv2.imread(args.input)
+    tensor = torch.from_numpy(img)
+
+    outputs = predictor(tensor)
+    image_vis = visualizer.visualize(img, outputs)
+
+    cv2.imwrite(save_path, image_vis)
+    print(f"Image saved to {save_path}")
+else:
+    cap = cv2.VideoCapture(args.input)
+    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    writer = None
+    try:
+        for i in count():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            tensor = torch.from_numpy(frame)
+            outputs = predictor(tensor)
+            image_vis = visualizer.visualize(frame, outputs)
+            if writer is None:
+                writer = cv2.VideoWriter(
+                    save_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (image_vis.shape[1], image_vis.shape[0]))
+            writer.write(image_vis)
+            print(f"Frame {i + 1}/{n_frames} processed", end="\r")
+    except KeyboardInterrupt:
+        pass
+    if writer is not None:
+        writer.release()
+        print(f"Video saved to {save_path}")
+    else:
+        print("No frames processed")
diff --git a/visualizer.py b/visualizer.py
@@ -21,8 +21,8 @@ def resample_uv_tensors_to_bbox(u: torch.Tensor, v: torch.Tensor, labels: torch.
     x, y, w, h = box_xywh_abs
     w = max(int(w), 1)
     h = max(int(h), 1)
-    u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
-    v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
+    u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False).float()
+    v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False).float()
     uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
     for part_id in range(1, u_bbox.size(1)):
         uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]