Optimize 'Camera' code

jkjung-avt · jkjung-avt · commit ecedc327fbf2 · 2020-08-18T13:13:23.000+08:00
diff --git a/README.md b/README.md
@@ -91,19 +91,24 @@ Step-by-step:
 
    ```shell
    $ cd ${HOME}/project/tensorrt_demos
-   $ python3 trt_googlenet.py --usb --vid 0 --width 1280 --height 720
+   $ python3 trt_googlenet.py --usb 0 --width 1280 --height 720
    ```
 
    Here's a screenshot of the demo (JetPack-4.2.2, i.e. TensorRT 5).
 
    ![A picture of a golden retriever](https://raw.githubusercontent.com/jkjung-avt/tensorrt_demos/master/doc/golden_retriever.png)
 
-5. The demo program supports a number of different image inputs.  You could do `python3 trt_googlenet.py --help` to read the help messages.  Or more specifically, the following inputs could be specified:
+5. The demo program supports 5 different image/video inputs.  You could do `python3 trt_googlenet.py --help` to read the help messages.  Or more specifically, the following inputs could be specified:
 
-   * `--file --filename test_video.mp4`: a video file, e.g. mp4 or ts.
-   * `--image --filename test_image.jpg`: an image file, e.g. jpg or png.
-   * `--usb --vid 0`: USB webcam (/dev/video0).
-   * `--rtsp --uri rtsp://admin:123456@192.168.1.1/live.sdp`: RTSP source, e.g. an IP cam.
+   * `--image test_image.jpg`: an image file, e.g. jpg or png.
+   * `--video test_video.mp4`: a video file, e.g. mp4 or ts.  An optional `--video_looping` flag could be enabled if needed.
+   * `--usb 0`: USB webcam (/dev/video0).
+   * `--rtsp rtsp://admin:123456@192.168.1.1/live.sdp`: RTSP source, e.g. an IP cam.  An optional `--rtsp_latency` argument could be used to adjust the latency setting in this case.
+   * `--onboard 0`: Jetson onboard camera.
+
+   In additional, you could use `--width` and `--height` to specify the desired input image size, and use `--do_resize` to force resizing of image/video file source.
+
+   The `--usb`, `--rtsp` and `--onboard` video sources usually produce image frames at 30 FPS.  If the TensorRT engine inference code runs faster than that (which happens easily on a x86_64 PC with a good GPU), one particular image could be inferenced multiple times before the next image frame becomes available.  This causes problem in the object detector demos, since the original image could have been altered (bounding boxes drawn) and the altered image is taken for inference again.  To cope with this problem, use the optional `--copy_frame` flag to force copying/cloning image frames internally.
 
 6. Check out my blog post for implementation details:
 
@@ -131,7 +136,7 @@ Assuming this repository has been cloned at "${HOME}/project/tensorrt_demos", fo
 
    ```shell
    $ cd ${HOME}/project/tensorrt_demos
-   $ python3 trt_mtcnn.py --image --filename ${HOME}/Pictures/avengers.jpg
+   $ python3 trt_mtcnn.py --image ${HOME}/Pictures/avengers.jpg
    ```
 
    Here's the result (JetPack-4.2.2, i.e. TensorRT 5).
@@ -169,9 +174,8 @@ Assuming this repository has been cloned at "${HOME}/project/tensorrt_demos", fo
 
    ```shell
    $ cd ${HOME}/project/tensorrt_demos
-   $ python3 trt_ssd.py --model ssd_mobilenet_v1_coco \
-                        --image \
-                        --filename ${HOME}/project/tf_trt_models/examples/detection/data/huskies.jpg
+   $ python3 trt_ssd.py --image ${HOME}/project/tf_trt_models/examples/detection/data/huskies.jpg \
+                        --model ssd_mobilenet_v1_coco
    ```
 
    Here's the result (JetPack-4.2.2, i.e. TensorRT 5).  Frame rate was good (over 20 FPS).
@@ -187,9 +191,8 @@ Assuming this repository has been cloned at "${HOME}/project/tensorrt_demos", fo
    I also tested the "ssd_mobilenet_v1_egohands" (hand detector) model with a video clip from YouTube, and got the following result.  Again, frame rate was pretty good.  But the detection didn't seem very accurate though :-(
 
    ```shell
-   $ python3 trt_ssd.py --model ssd_mobilenet_v1_egohands \
-                        --file \
-                        --filename ${HOME}/Videos/Nonverbal_Communication.mp4
+   $ python3 trt_ssd.py --video ${HOME}/Videos/Nonverbal_Communication.mp4 \
+                        --model ssd_mobilenet_v1_egohands
    ```
 
    (Click on the image below to see the whole video clip...)
@@ -202,9 +205,8 @@ Assuming this repository has been cloned at "${HOME}/project/tensorrt_demos", fo
 
    ```shell
    $ cd ${HOME}/project/tensorrt_demos
-   $ python3 trt_ssd_async.py --model ssd_mobilenet_v1_coco \
-                              --image \
-                              --filename ${HOME}/project/tf_trt_models/examples/detection/data/huskies.jpg
+   $ python3 trt_ssd_async.py --image ${HOME}/project/tf_trt_models/examples/detection/data/huskies.jpg \
+                              --model ssd_mobilenet_v1_coco
    ```
 
 5. To verify accuracy (mAP) of the optimized TensorRT engines and make sure they do not degrade too much (due to reduced floating-point precision of "FP16") from the original TensorFlow frozen inference graphs, you could prepare validation data and run "eval_ssd.py".  Refer to [README_mAP.md](README_mAP.md) for details.
@@ -278,8 +280,8 @@ Assuming this repository has been cloned at "${HOME}/project/tensorrt_demos", fo
 
    ```shell
    $ wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg -O ${HOME}/Pictures/dog.jpg
-   $ python3 trt_yolo.py -m yolov4-416 \
-                         --image --filename ${HOME}/Pictures/dog.jpg
+   $ python3 trt_yolo.py --image ${HOME}/Pictures/dog.jpg \
+                         -m yolov4-416
    ```
 
    This is a screenshot of the demo against JetPack-4.4, i.e. TensorRT 7.
diff --git a/trt_googlenet.py b/trt_googlenet.py
@@ -88,12 +88,13 @@ def loop_and_classify(cam, net, labels, do_cropping):
         if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
             break
         img = cam.read()
-        if img is not None:
-            top_probs, top_labels = classify(img, net, labels, do_cropping)
-            show_top_preds(img, top_probs, top_labels)
-            if show_help:
-                show_help_text(img, help_text)
-            cv2.imshow(WINDOW_NAME, img)
+        if img is None:
+            break
+        top_probs, top_labels = classify(img, net, labels, do_cropping)
+        show_top_preds(img, top_probs, top_labels)
+        if show_help:
+            show_help_text(img, help_text)
+        cv2.imshow(WINDOW_NAME, img)
         key = cv2.waitKey(1)
         if key == 27:  # ESC key: quit program
             break
@@ -108,19 +109,17 @@ def main():
     args = parse_args()
     labels = np.loadtxt('googlenet/synset_words.txt', str, delimiter='\t')
     cam = Camera(args)
-    cam.open()
-    if not cam.is_opened:
+    if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
 
     # initialize the tensorrt googlenet engine
     net = PyTrtGooglenet(DEPLOY_ENGINE, ENGINE_SHAPE0, ENGINE_SHAPE1)
 
-    cam.start()
-    open_window(WINDOW_NAME, args.image_width, args.image_height,
-                'Camera TensorRT GoogLeNet Demo for Jetson Nano')
+    open_window(
+        WINDOW_NAME, 'Camera TensorRT GoogLeNet Demo',
+        cam.img_width, cam.img_height)
     loop_and_classify(cam, net, labels, args.crop_center)
 
-    cam.stop()
     cam.release()
     cv2.destroyAllWindows()
 
diff --git a/trt_googlenet_async.py b/trt_googlenet_async.py
@@ -26,7 +26,7 @@
 RESIZED_SHAPE = (224, 224)
 
 WINDOW_NAME = 'TrtGooglenetDemo'
-MAIN_THREAD_TIMEOUT = 30.0  # 30 seconds
+MAIN_THREAD_TIMEOUT = 10.0  # 10 seconds
 
 # 'shared' global variables
 s_img, s_probs, s_labels = None, None, None
@@ -99,6 +99,8 @@ def run(self):
         self.running = True
         while self.running:
             img = self.cam.read()
+            if img is None:
+                break
             top_probs, top_labels = classify(
                 img, self.net, self.labels, self.do_cropping)
             with self.condition:
@@ -162,20 +164,18 @@ def main():
     args = parse_args()
     labels = np.loadtxt('googlenet/synset_words.txt', str, delimiter='\t')
     cam = Camera(args)
-    cam.open()
-    if not cam.is_opened:
+    if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
 
-    cam.start()  # let camera start grabbing frames
-    open_window(WINDOW_NAME, args.image_width, args.image_height,
-                'Camera TensorRT GoogLeNet Demo for Jetson Nano')
+    open_window(
+        WINDOW_NAME, 'Camera TensorRT GoogLeNet Demo',
+        cam.img_width, cam.img_height)
     condition = threading.Condition()
     trt_thread = TrtGooglenetThread(condition, cam, labels, args.crop_center)
     trt_thread.start()  # start the child thread
     loop_and_display(condition)
     trt_thread.stop()   # stop the child thread
 
-    cam.stop()
     cam.release()
     cv2.destroyAllWindows()
 
diff --git a/trt_ssd.py b/trt_ssd.py
@@ -58,16 +58,17 @@ def loop_and_detect(cam, trt_ssd, conf_th, vis):
         if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
             break
         img = cam.read()
-        if img is not None:
-            boxes, confs, clss = trt_ssd.detect(img, conf_th)
-            img = vis.draw_bboxes(img, boxes, confs, clss)
-            img = show_fps(img, fps)
-            cv2.imshow(WINDOW_NAME, img)
-            toc = time.time()
-            curr_fps = 1.0 / (toc - tic)
-            # calculate an exponentially decaying average of fps number
-            fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
-            tic = toc
+        if img is None:
+            break
+        boxes, confs, clss = trt_ssd.detect(img, conf_th)
+        img = vis.draw_bboxes(img, boxes, confs, clss)
+        img = show_fps(img, fps)
+        cv2.imshow(WINDOW_NAME, img)
+        toc = time.time()
+        curr_fps = 1.0 / (toc - tic)
+        # calculate an exponentially decaying average of fps number
+        fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
+        tic = toc
         key = cv2.waitKey(1)
         if key == 27:  # ESC key: quit program
             break
@@ -79,20 +80,18 @@ def loop_and_detect(cam, trt_ssd, conf_th, vis):
 def main():
     args = parse_args()
     cam = Camera(args)
-    cam.open()
-    if not cam.is_opened:
+    if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
 
     cls_dict = get_cls_dict(args.model.split('_')[-1])
     trt_ssd = TrtSSD(args.model, INPUT_HW)
 
-    cam.start()
-    open_window(WINDOW_NAME, args.image_width, args.image_height,
-                'Camera TensorRT SSD Demo for Jetson Nano')
+    open_window(
+        WINDOW_NAME, 'Camera TensorRT SSD Demo',
+        cam.img_width, cam.img_height)
     vis = BBoxVisualization(cls_dict)
     loop_and_detect(cam, trt_ssd, conf_th=0.3, vis=vis)
 
-    cam.stop()
     cam.release()
     cv2.destroyAllWindows()
 
diff --git a/trt_ssd_async.py b/trt_ssd_async.py
@@ -24,7 +24,7 @@
 
 
 WINDOW_NAME = 'TrtSsdDemoAsync'
-MAIN_THREAD_TIMEOUT = 30.0  # 30 seconds
+MAIN_THREAD_TIMEOUT = 20.0  # 20 seconds
 INPUT_HW = (300, 300)
 SUPPORTED_MODELS = [
     'ssd_mobilenet_v1_coco',
@@ -98,6 +98,8 @@ def run(self):
         self.running = True
         while self.running:
             img = self.cam.read()
+            if img is None:
+                break
             boxes, confs, clss = self.trt_ssd.detect(img, self.conf_th)
             with self.condition:
                 s_img, s_boxes, s_confs, s_clss = img, boxes, confs, clss
@@ -156,25 +158,23 @@ def loop_and_display(condition, vis):
 def main():
     args = parse_args()
     cam = Camera(args)
-    cam.open()
-    if not cam.is_opened:
+    if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
 
-    cls_dict = get_cls_dict(args.model.split('_')[-1])
-
     cuda.init()  # init pycuda driver
 
-    cam.start()  # let camera start grabbing frames
-    open_window(WINDOW_NAME, args.image_width, args.image_height,
-                'Camera TensorRT SSD Demo for Jetson Nano')
+    cls_dict = get_cls_dict(args.model.split('_')[-1])
+
+    open_window(
+        WINDOW_NAME, 'Camera TensorRT SSD Demo',
+        cam.img_width, cam.img_height)
     vis = BBoxVisualization(cls_dict)
     condition = threading.Condition()
     trt_thread = TrtThread(condition, cam, args.model, conf_th=0.3)
     trt_thread.start()  # start the child thread
     loop_and_display(condition, vis)
     trt_thread.stop()   # stop the child thread
 
-    cam.stop()
     cam.release()
     cv2.destroyAllWindows()
 
diff --git a/trt_yolo.py b/trt_yolo.py
@@ -58,16 +58,17 @@ def loop_and_detect(cam, trt_yolo, conf_th, vis):
         if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
             break
         img = cam.read()
-        if img is not None:
-            boxes, confs, clss = trt_yolo.detect(img, conf_th)
-            img = vis.draw_bboxes(img, boxes, confs, clss)
-            img = show_fps(img, fps)
-            cv2.imshow(WINDOW_NAME, img)
-            toc = time.time()
-            curr_fps = 1.0 / (toc - tic)
-            # calculate an exponentially decaying average of fps number
-            fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
-            tic = toc
+        if img is None:
+            break
+        boxes, confs, clss = trt_yolo.detect(img, conf_th)
+        img = vis.draw_bboxes(img, boxes, confs, clss)
+        img = show_fps(img, fps)
+        cv2.imshow(WINDOW_NAME, img)
+        toc = time.time()
+        curr_fps = 1.0 / (toc - tic)
+        # calculate an exponentially decaying average of fps number
+        fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
+        tic = toc
         key = cv2.waitKey(1)
         if key == 27:  # ESC key: quit program
             break
@@ -84,8 +85,7 @@ def main():
         raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)
 
     cam = Camera(args)
-    cam.open()
-    if not cam.is_opened:
+    if not cam.isOpened():
         raise SystemExit('ERROR: failed to open camera!')
 
     cls_dict = get_cls_dict(args.category_num)
@@ -102,13 +102,12 @@ def main():
 
     trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)
 
-    cam.start()
-    open_window(WINDOW_NAME, args.image_width, args.image_height,
-                'Camera TensorRT YOLO Demo')
+    open_window(
+        WINDOW_NAME, 'Camera TensorRT YOLO Demo',
+        cam.img_width, cam.img_height)
     vis = BBoxVisualization(cls_dict)
     loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
 
-    cam.stop()
     cam.release()
     cv2.destroyAllWindows()
 
diff --git a/utils/camera.py b/utils/camera.py
diff --git a/utils/display.py b/utils/display.py