Panoptic segmentation: download pretrained weights in ROS node (opendr-eu#269)

vniclas · web-flow · commit f59deb6f2a07 · 2022-09-05T17:05:24.000+02:00
* Download pre-trained model weights

* Add backward compatibility

* Do not remove downloaded checkpoint file

* Resolve PEP8 issue

* Consistent with ROS2

* Address review

* Reduce queue size to 1

* Set default for input rgb image topic
diff --git a/projects/opendr_ws/src/perception/README.md b/projects/opendr_ws/src/perception/README.md
@@ -164,15 +164,16 @@ rosrun perception object_detection_2d_gem.py
 A ROS node for performing panoptic segmentation on a specified RGB image stream using the [EfficientPS](../../../../src/opendr/perception/panoptic_segmentation/README.md) network.
 Assuming that the OpenDR catkin workspace has been sourced, the node can be started with:
 ```shell
-rosrun perception panoptic_segmentation_efficient_ps.py CHECKPOINT IMAGE_TOPIC
+rosrun perception panoptic_segmentation_efficient_ps.py
 ```
-with `CHECKPOINT` pointing to the path to the trained model weights and `IMAGE_TOPIC` specifying the ROS topic, to which the node will subscribe.
 
-Additionally, the following optional arguments are available:
+The following optional arguments are available:
 - `-h, --help`: show a help message and exit
-- `--heamap_topic HEATMAP_TOPIC`: publish the semantic and instance maps on `HEATMAP_TOPIC`
-- `--visualization_topic VISUALIZATION_TOPIC`: publish the panoptic segmentation map as an RGB image on `VISUALIZATION_TOPIC` or a more detailed overview if using the `--detailed_visualization` flag
-- `--detailed_visualization`: generate a combined overview of the input RGB image and the semantic, instance, and panoptic segmentation maps
+- `--input_rgb_image_topic INPUT_RGB_IMAGE_TOPIC` : listen to RGB images on this topic (default=`/usb_cam/image_raw`)
+- `--checkpoint CHECKPOINT` : download pretrained models [cityscapes, kitti] or load from the provided path (default=`cityscapes`)
+- `--output_rgb_image_topic OUTPUT_RGB_IMAGE_TOPIC`: publish the semantic and instance maps on this topic as `OUTPUT_HEATMAP_TOPIC/semantic` and `OUTPUT_HEATMAP_TOPIC/instance` (default=`/opendir/panoptic`)
+- `--visualization_topic VISUALIZATION_TOPIC`: publish the panoptic segmentation map as an RGB image on `VISUALIZATION_TOPIC` or a more detailed overview if using the `--detailed_visualization` flag (default=`/opendr/panoptic/rgb_visualization`)
+- `--detailed_visualization`: generate a combined overview of the input RGB image and the semantic, instance, and panoptic segmentation maps and publish it on `OUTPUT_RGB_IMAGE_TOPIC` (default=deactivated)
 
 
 ## Semantic Segmentation ROS Node
diff --git a/projects/opendr_ws/src/perception/scripts/panoptic_segmentation_efficient_ps.py b/projects/opendr_ws/src/perception/scripts/panoptic_segmentation_efficient_ps.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
+from pathlib import Path
 import argparse
 from typing import Optional
 
@@ -29,27 +31,31 @@
 
 class EfficientPsNode:
     def __init__(self,
+                 input_rgb_image_topic: str,
                  checkpoint: str,
-                 input_image_topic: str,
                  output_heatmap_topic: Optional[str] = None,
-                 output_visualization_topic: Optional[str] = None,
+                 output_rgb_visualization_topic: Optional[str] = None,
                  detailed_visualization: bool = False
                  ):
         """
         Initialize the EfficientPS ROS node and create an instance of the respective learner class.
-        :param checkpoint: Path to a saved model
+        :param checkpoint: This is either a path to a saved model or one of [cityscapes, kitti] to download
+            pre-trained model weights.
         :type checkpoint: str
-        :param input_image_topic: ROS topic for the input image stream
-        :type input_image_topic: str
+        :param input_rgb_image_topic: ROS topic for the input image stream
+        :type input_rgb_image_topic: str
         :param output_heatmap_topic: ROS topic for the predicted semantic and instance maps
         :type output_heatmap_topic: str
-        :param output_visualization_topic: ROS topic for the generated visualization of the panoptic map
-        :type output_visualization_topic: str
+        :param output_rgb_visualization_topic: ROS topic for the generated visualization of the panoptic map
+        :type output_rgb_visualization_topic: str
+        :param detailed_visualization: if True, generate a combined overview of the input RGB image and the
+            semantic, instance, and panoptic segmentation maps and publish it on output_rgb_visualization_topic
+        :type detailed_visualization: bool
         """
+        self.input_rgb_image_topic = input_rgb_image_topic
         self.checkpoint = checkpoint
-        self.input_image_topic = input_image_topic
         self.output_heatmap_topic = output_heatmap_topic
-        self.output_visualization_topic = output_visualization_topic
+        self.output_rgb_visualization_topic = output_rgb_visualization_topic
         self.detailed_visualization = detailed_visualization
 
         # Initialize all ROS related things
@@ -59,14 +65,27 @@ def __init__(self,
         self._visualization_publisher = None
 
         # Initialize the panoptic segmentation network
-        self._learner = EfficientPsLearner()
+        config_file = Path(sys.modules[
+                               EfficientPsLearner.__module__].__file__).parent / 'configs' / 'singlegpu_cityscapes.py'
+        self._learner = EfficientPsLearner(str(config_file))
+
+        # Other
+        self._tmp_folder = Path(__file__).parent.parent / 'tmp' / 'efficientps'
+        self._tmp_folder.mkdir(exist_ok=True, parents=True)
 
     def _init_learner(self) -> bool:
         """
-        Load the weights from the specified checkpoint file.
+        The model can be initialized via
+        1. downloading pre-trained weights for Cityscapes or KITTI.
+        2. passing a path to an existing checkpoint file.
 
         This has not been done in the __init__() function since logging is available only once the node is registered.
         """
+        if self.checkpoint in ['cityscapes', 'kitti']:
+            file_path = EfficientPsLearner.download(str(self._tmp_folder),
+                                                    trained_on=self.checkpoint)
+            self.checkpoint = file_path
+
         if self._learner.load(self.checkpoint):
             rospy.loginfo('Successfully loaded the checkpoint.')
             return True
@@ -78,19 +97,20 @@ def _init_subscribers(self):
         """
         Subscribe to all relevant topics.
         """
-        rospy.Subscriber(self.input_image_topic, ROS_Image, self.callback)
+        rospy.Subscriber(self.input_rgb_image_topic, ROS_Image, self.callback, queue_size=1, buff_size=10000000)
 
     def _init_publisher(self):
         """
         Set up the publishers as requested by the user.
         """
         if self.output_heatmap_topic is not None:
-            self._instance_heatmap_publisher = rospy.Publisher(f'{self.output_heatmap_topic}/instance', ROS_Image,
-                                                               queue_size=10)
-            self._semantic_heatmap_publisher = rospy.Publisher(f'{self.output_heatmap_topic}/semantic', ROS_Image,
-                                                               queue_size=10)
-        if self.output_visualization_topic is not None:
-            self._visualization_publisher = rospy.Publisher(self.output_visualization_topic, ROS_Image, queue_size=10)
+            self._instance_heatmap_publisher = rospy.Publisher(
+                f'{self.output_heatmap_topic}/instance', ROS_Image, queue_size=10)
+            self._semantic_heatmap_publisher = rospy.Publisher(
+                f'{self.output_heatmap_topic}/semantic', ROS_Image, queue_size=10)
+        if self.output_rgb_visualization_topic is not None:
+            self._visualization_publisher = rospy.Publisher(self.output_rgb_visualization_topic,
+                                                            ROS_Image, queue_size=10)
 
     def listen(self):
         """
@@ -128,26 +148,31 @@ def callback(self, data: ROS_Image):
             if self._semantic_heatmap_publisher is not None and self._semantic_heatmap_publisher.get_num_connections() > 0:
                 self._semantic_heatmap_publisher.publish(self._bridge.to_ros_image(prediction[1]))
 
-        except Exception:
-            rospy.logwarn('Failed to generate prediction.')
+        except Exception as e:
+            rospy.logwarn(f'Failed to generate prediction: {e}')
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('checkpoint', type=str, help='load the model weights from the provided path')
-    parser.add_argument('image_topic', type=str, help='listen to images on this topic')
-    parser.add_argument('--heatmap_topic', type=str, help='publish the semantic and instance maps on this topic')
-    parser.add_argument('--visualization_topic', type=str,
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('input_rgb_image_topic', type=str, default='/usb_cam/image_raw',
+                        help='listen to RGB images on this topic')
+    parser.add_argument('--checkpoint', type=str, default='cityscapes',
+                        help='download pretrained models [cityscapes, kitti] or load from the provided path')
+    parser.add_argument('--output_heatmap_topic', type=str, default='/opendr/panoptic',
+                        help='publish the semantic and instance maps on this topic as "OUTPUT_HEATMAP_TOPIC/semantic" \
+                             and "OUTPUT_HEATMAP_TOPIC/instance"')
+    parser.add_argument('--output_rgb_image_topic', type=str,
+                        default='/opendr/panoptic/rgb_visualization',
                         help='publish the panoptic segmentation map as an RGB image on this topic or a more detailed \
                               overview if using the --detailed_visualization flag')
     parser.add_argument('--detailed_visualization', action='store_true',
                         help='generate a combined overview of the input RGB image and the semantic, instance, and \
-                              panoptic segmentation maps')
+                              panoptic segmentation maps and publish it on OUTPUT_RGB_IMAGE_TOPIC')
     args = parser.parse_args()
 
-    efficient_ps_node = EfficientPsNode(args.checkpoint,
-                                        args.image_topic,
-                                        args.heatmap_topic,
-                                        args.visualization_topic,
+    efficient_ps_node = EfficientPsNode(args.input_rgb_image_topic,
+                                        args.checkpoint,
+                                        args.output_heatmap_topic,
+                                        args.output_rgb_image_topic,
                                         args.detailed_visualization)
     efficient_ps_node.listen()
diff --git a/src/opendr/engine/target.py b/src/opendr/engine/target.py
@@ -1072,6 +1072,14 @@ def numpy(self):
         # Since this class stores the data as NumPy arrays, we can directly return the data.
         return self.data
 
+    def opencv(self):
+        """
+        Required to support the ros bridge for images.
+        :return: a NumPy-compatible representation of data
+        :rtype: numpy.ndarray
+        """
+        return self.numpy()
+
     def shape(self) -> Tuple[int, ...]:
         """
         Returns the shape of the underlying NumPy array.
diff --git a/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py b/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py
@@ -306,17 +306,18 @@ def infer(self,
             warnings.warn('The current model has not been trained.')
         self.model.eval()
 
-        # Build the data pipeline
-        test_pipeline = Compose(self._cfg.test_pipeline[1:])
-        device = next(self.model.parameters()).device
-
-        # Convert to the format expected by the mmdetection API
         single_image_mode = False
         if isinstance(batch, Image):
             batch = [batch]
             single_image_mode = True
+
+        # Convert to the format expected by the mmdetection API
         mmdet_batch = []
+        device = next(self.model.parameters()).device
         for img in batch:
+            # Change the processing size according to the input image
+            self._cfg.test_pipeline[1:][0]['img_scale'] = batch[0].data.shape[1:]
+            test_pipeline = Compose(self._cfg.test_pipeline[1:])
             # Convert from OpenDR convention (CHW/RGB) to the expected format (HWC/BGR)
             img_ = img.convert('channels_last', 'bgr')
             mmdet_img = {'filename': None, 'img': img_, 'img_shape': img_.shape, 'ori_shape': img_.shape}
@@ -481,8 +482,12 @@ def update_to(b=1, bsize=1, total=None):
 
             return update_to
 
-        with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=f'Downloading {filename}') as pbar:
-            urllib.request.urlretrieve(url, filename, pbar_hook(pbar))
+        if os.path.exists(filename) and os.path.isfile(filename):
+            print(f'File already downloaded: {filename}')
+        else:
+            with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=f'Downloading {filename}') \
+                    as pbar:
+                urllib.request.urlretrieve(url, filename, pbar_hook(pbar))
         return filename
 
     @staticmethod