update smooth loss

Galaxies99 · Sep 10, 2021 · 4df4487 · 4df4487
1 parent d5f9424
commit 4df4487
Show file tree

Hide file tree

Showing 15 changed files with 545 additions and 204 deletions.
diff --git a/configs/320x240/train_tg_val_tg.yaml b/configs/320x240/train_tg_val_tg.yaml
@@ -27,13 +27,15 @@
     "rgb_augmentation_probability": 0.8
     "depth_min": 0.0
     "depth_max": 10.0
+    "depth_norm": 10.0
   "test":
     "type": "transcg"
     "data_dir": "data"
     "image_size": !!python/tuple [320, 240]
     "use_augmentation": False
     "depth_min": 0.0
     "depth_max": 10.0
+    "depth_norm": 10.0
 
 "dataloader":
   "num_workers": 48

diff --git a/configs/320x240/train_tg_val_tg_add.yaml b/configs/320x240/train_tg_val_tg_add.yaml
@@ -0,0 +1,61 @@
+# script id: 1
+"model":
+  "type": "DFNet"
+  "params":
+    "in_channels": 4
+    "hidden_channels": 64
+    "L": 5
+    "k": 12
+
+"optimizer":
+  "type": "AdamW"
+  "params":
+    "lr": 0.001
+
+"lr_scheduler":
+  "type": "MultiStepLR"
+  "params":
+    "milestones": [5, 15, 25, 35]
+    "gamma": 0.2
+
+"dataset":
+  "train":
+    "type": "transcg"
+    "data_dir": "data"
+    "image_size": !!python/tuple [320, 240]
+    "use_augmentation": True
+    "rgb_augmentation_probability": 0.8
+    "depth_min": 0.0
+    "depth_max": 10.0
+    "depth_norm": 10.0
+  "test":
+    "type": "transcg"
+    "data_dir": "data"
+    "image_size": !!python/tuple [320, 240]
+    "use_augmentation": False
+    "depth_min": 0.0
+    "depth_max": 10.0
+    "depth_norm": 10.0
+
+"dataloader":
+  "num_workers": 48
+  "shuffle": True
+  "drop_last": True
+
+"trainer":
+  "batch_size": 32
+  "test_batch_size": 1
+  "multigpu": True
+  "max_epoch": 40  
+  "criterion":
+    "type": "custom_masked_mse_loss"
+    "epsilon": 0.00000001
+
+"metrics":
+  "types": ["MSE", "MaskedMSE", "RMSE", "MaskedRMSE", "REL", "MaskedREL", "MAE", "MaskedMAE", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"]
+  "epsilon": 0.00000001
+  "depth_scale": 10.0
+
+"stats":
+  "stats_dir": "stats"
+  "stats_exper": "train-tg-val-tg-add"
diff --git a/configs/320x240/train_tg_val_tg_combine.yaml b/configs/320x240/train_tg_val_tg_combine.yaml
@@ -0,0 +1,63 @@
+# script id: 1
+"model":
+  "type": "DFNet"
+  "params":
+    "in_channels": 4
+    "hidden_channels": 64
+    "L": 5
+    "k": 12
+
+"optimizer":
+  "type": "AdamW"
+  "params":
+    "lr": 0.001
+
+"lr_scheduler":
+  "type": "MultiStepLR"
+  "params":
+    "milestones": [5, 15, 25, 35]
+    "gamma": 0.2
+
+"dataset":
+  "train":
+    "type": "transcg"
+    "data_dir": "data"
+    "image_size": !!python/tuple [320, 240]
+    "use_augmentation": True
+    "rgb_augmentation_probability": 0.8
+    "depth_min": 0.3
+    "depth_max": 1.5
+    "depth_norm": 1.0
+  "test":
+    "type": "transcg"
+    "data_dir": "data"
+    "image_size": !!python/tuple [320, 240]
+    "use_augmentation": False
+    "depth_min": 0.3
+    "depth_max": 1.5
+    "depth_norm": 1.0
+
+"dataloader":
+  "num_workers": 48
+  "shuffle": True
+  "drop_last": True
+
+"trainer":
+  "batch_size": 32
+  "test_batch_size": 1
+  "multigpu": True
+  "max_epoch": 40  
+  "criterion":
+    "type": "custom_masked_mse_loss"
+    "epsilon": 0.00000001
+    "combined_smooth": True
+    "combined_beta": 0.005
+
+"metrics":
+  "types": ["MSE", "MaskedMSE", "RMSE", "MaskedRMSE", "REL", "MaskedREL", "MAE", "MaskedMAE", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"]
+  "epsilon": 0.00000001
+  "depth_scale": 1.0
+
+"stats":
+  "stats_dir": "stats"
+  "stats_exper": "train-tg-val-tg-comb"
diff --git a/configs/inference.yaml b/configs/inference.yaml
@@ -7,7 +7,7 @@
     "k": 12
 
 "inference":
-  "checkpoint_path": "stats/checkpoint.tar"
+  "checkpoint_path": "stats/train-tg-val-tg-comb/checkpoint.tar"
   "image_size": !!python/tuple [320, 240]
   "cuda_id": 0
   "depth_min": 0.0

diff --git a/datasets/transcg.py b/datasets/transcg.py
@@ -33,9 +33,6 @@ def __init__(self, data_dir, split = 'train', **kwargs):
             raise AttributeError('Invalid split option.')
         self.data_dir = data_dir
         self.split = split
-        self.high_resolution = kwargs.get('high_resolution', False)
-        if self.high_resolution and split == 'train':
-            raise AttributeError('Does not support returning high resolution images during training. If you want to train on high resolution samples, please set image_size arguments in high resolution.')
         with open(os.path.join(self.data_dir, 'metadata.json'), 'r') as fp:
             self.dataset_metadata = json.load(fp)
         self.scene_num = self.dataset_metadata['total_scenes']
@@ -64,19 +61,22 @@ def __init__(self, data_dir, split = 'train', **kwargs):
                 ])
         # Integrity double-check
         assert len(self.sample_info) == self.total_samples, "Error in total samples, expect {} samples, found {} samples.".format(self.total_samples, len(self.sample_info))
+        # Other parameters
+        self.cam_intrinsics = [None, np.load(os.path.join(self.data_dir, 'camera_intrinsics', 'camIntrinsics-D435.npy')), np.load(os.path.join(self.data_dir, 'camera_intrinsics', 'camIntrinsics-L515.npy'))]
         self.use_aug = kwargs.get('use_augmentation', True)
         self.rgb_aug_prob = kwargs.get('rgb_augmentation_probability', 0.8)
         self.image_size = kwargs.get('image_size', (1280, 720))
-        self.depth_min = kwargs.get('depth_min', 0.0)
-        self.depth_max = kwargs.get('depth_max', 10.0)
+        self.depth_min = kwargs.get('depth_min', 0.3)
+        self.depth_max = kwargs.get('depth_max', 1.5)
+        self.depth_norm = kwargs.get('depth_norm', 1.0)
 
     def __getitem__(self, id):
         img_path, camera_type, scene_type = self.sample_info[id]
         rgb = np.array(Image.open(os.path.join(img_path, 'rgb{}.png'.format(camera_type))), dtype = np.float32)
         depth = np.array(Image.open(os.path.join(img_path, 'depth{}.png'.format(camera_type))), dtype = np.float32)
         depth_gt = np.array(Image.open(os.path.join(img_path, 'depth{}-gt.png'.format(camera_type))), dtype = np.float32)
         depth_gt_mask = np.array(Image.open(os.path.join(img_path, 'depth{}-gt-mask.png'.format(camera_type))), dtype = np.uint8)
-        return process_data(rgb, depth, depth_gt, depth_gt_mask, scene_type, camera_type, split = self.split, image_size = self.image_size, depth_min = self.depth_min, depth_max = self.depth_max, use_aug = self.use_aug, rgb_aug_prob = self.rgb_aug_prob, retain_original = self.high_resolution)
+        return process_data(rgb, depth, depth_gt, depth_gt_mask, self.cam_intrinsics[camera_type], scene_type, camera_type, split = self.split, image_size = self.image_size, depth_min = self.depth_min, depth_max = self.depth_max, depth_norm = self.depth_norm, use_aug = self.use_aug, rgb_aug_prob = self.rgb_aug_prob)
 
     def __len__(self):
         return self.total_samples
diff --git a/inference.py b/inference.py
@@ -66,6 +66,7 @@ def __init__(self, cfg_path = os.path.join('configs', 'inference.yaml'), with_in
 
         self.image_size = self.builder.get_inference_image_size()
         self.depth_min, self.depth_max = self.builder.get_inference_depth_min_max()
+        self.depth_norm = self.builder.get_inference_depth_norm()
 
     def inference(self, rgb, depth, target_size = (1280, 720)):
         """
@@ -86,7 +87,10 @@ def inference(self, rgb, depth, target_size = (1280, 720)):
 
         rgb = cv2.resize(rgb, self.image_size, interpolation = cv2.INTER_NEAREST)
         depth = cv2.resize(depth, self.image_size, interpolation = cv2.INTER_NEAREST)
-        depth = (depth - self.depth_min) / (self.depth_max - self.depth_min)
+        depth = np.where(depth < self.depth_min, 0, depth)
+        depth = np.where(depth > self.depth_max, 0, depth)
+        depth[np.isnan(depth)] = 0
+        depth = depth / self.depth_norm
         rgb = (rgb / 255.0).transpose(2, 0, 1)
         rgb = torch.FloatTensor(rgb).to(self.device).unsqueeze(0)
         depth = torch.FloatTensor(depth).to(self.device).unsqueeze(0)
@@ -97,7 +101,7 @@ def inference(self, rgb, depth, target_size = (1280, 720)):
         if self.with_info:
             self.logger.info("Inference finished, time: {:.4f}s.".format(time_end - time_start))
         depth_res = depth_res.squeeze(0).cpu().detach().numpy()
-        depth_res = depth_res * (self.depth_max - self.depth_min) + self.depth_min
-        depth_res = cv2.resize(depth_res, target_size, interpolation = cv2.INTER_NEAREST)
+        depth_res = depth_res * self.depth_norm
+        depth_res = cv2.resize(depth_res, target_size, interpolation = cv2.INTER_LANCZOS4)
         return depth_res
-
+
diff --git a/sample_inference.py b/sample_inference.py
@@ -55,6 +55,7 @@ def draw_point_cloud(color, depth, camera_intrinsics, use_mask = False, use_inpa
 cam_intrinsics = np.load('data/camera_intrinsics/camIntrinsics-D435.npy')
 
 res = np.clip(res, 0.1, 1.5)
+depth = np.clip(depth, 0.1, 1.5)
 
 cloud = draw_point_cloud(rgb, res, cam_intrinsics, scale = 1.0)
 

diff --git a/test.py b/test.py
@@ -14,6 +14,7 @@
 from tqdm import tqdm
 from utils.logger import ColoredLogger
 from utils.builder import ConfigBuilder
+from utils.functions import to_device
 from time import perf_counter
 
 
@@ -68,22 +69,22 @@ def test():
     running_time = []
     losses = []
     with tqdm(test_dataloader) as pbar:
-        for data in pbar:
-            rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
-            rgb = rgb.to(device)
-            depth = depth.to(device)
-            depth_gt = depth_gt.to(device)
-            depth_gt_mask = depth_gt_mask.to(device)
-            scene_mask = scene_mask.to(device)
+        for data_dict in pbar:
+            data_dict = to_device(data_dict, device)
             with torch.no_grad():
                 time_start = perf_counter()
-                res = model(rgb, depth)
+                res = model(data_dict['rgb'], data_dict['depth'])
                 time_end = perf_counter()
-                loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
-                _ = metrics.evaluate_batch(res, depth_gt, depth_gt_mask, scene_mask, record = True)
+                data_dict['pred'] = res
+                loss_dict = criterion(data_dict)
+                loss = loss_dict['loss']
+                _ = metrics.evaluate_batch(data_dict, record = True)
             duration = time_end - time_start
-            pbar.set_description('Loss: {:.8f}, model time: {:.4f}s'.format(loss.mean().item(), duration))
-            losses.append(loss.mean().item())
+            if 'smooth' in loss_dict.keys():
+                pbar.set_description('Loss: {:.8f}, smooth loss: {:.8f}'.format(loss.item(), loss_dict['smooth'].item()))
+            else:
+                pbar.set_description('Loss: {:.8f}'.format(loss.item()))
+            losses.append(loss.item())
             running_time.append(duration)
     mean_loss = np.stack(losses).mean()
     avg_running_time = np.stack(running_time).mean()

diff --git a/train.py b/train.py
@@ -15,7 +15,7 @@
 from utils.logger import ColoredLogger
 from utils.builder import ConfigBuilder
 from utils.constants import LOSS_INF
-from utils.functions import display_results
+from utils.functions import display_results, to_device
 from time import perf_counter
 
 
@@ -87,19 +87,19 @@ def train_one_epoch(epoch):
     model.train()
     losses = []
     with tqdm(train_dataloader) as pbar:
-        for data in pbar:
+        for data_dict in pbar:
             optimizer.zero_grad()
-            rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
-            rgb = rgb.to(device)
-            depth = depth.to(device)
-            depth_gt = depth_gt.to(device)
-            depth_gt_mask = depth_gt_mask.to(device)
-            scene_mask = scene_mask.to(device)
-            res = model(rgb, depth)
-            loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
+            data_dict = to_device(data_dict, device)
+            res = model(data_dict['rgb'], data_dict['depth'])
+            data_dict['pred'] = res
+            loss_dict = criterion(data_dict)
+            loss = loss_dict['loss']
             loss.backward()
             optimizer.step()
-            pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.mean().item()))
+            if 'smooth' in loss_dict.keys():
+                pbar.set_description('Epoch {}, loss: {:.8f}, smooth loss: {:.8f}'.format(epoch + 1, loss.item(), loss_dict['smooth'].item()))
+            else:
+                pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.item()))
             losses.append(loss.mean().item())
     mean_loss = np.stack(losses).mean()
     logger.info('Finish training process in epoch {}, mean training loss: {:.8f}'.format(epoch + 1, mean_loss))
@@ -112,22 +112,22 @@ def test_one_epoch(epoch):
     running_time = []
     losses = []
     with tqdm(test_dataloader) as pbar:
-        for data in pbar:
-            rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
-            rgb = rgb.to(device)
-            depth = depth.to(device)
-            depth_gt = depth_gt.to(device)
-            depth_gt_mask = depth_gt_mask.to(device)
-            scene_mask = scene_mask.to(device)
+        for data_dict in pbar:
+            data_dict = to_device(data_dict, device)
             with torch.no_grad():
                 time_start = perf_counter()
-                res = model(rgb, depth)
+                res = model(data_dict['rgb'], data_dict['depth'])
                 time_end = perf_counter()
-                loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
-                _ = metrics.evaluate_batch(res, depth_gt, depth_gt_mask, scene_mask, record = True)
+                data_dict['pred'] = res
+                loss_dict = criterion(data_dict)
+                loss = loss_dict['loss']
+                _ = metrics.evaluate_batch(data_dict, record = True)
             duration = time_end - time_start
-            pbar.set_description('Epoch {}, loss: {:.8f}, model time: {:.4f}s'.format(epoch + 1, loss.mean().item(), duration))
-            losses.append(loss.mean().item())
+            if 'smooth' in loss_dict.keys():
+                pbar.set_description('Epoch {}, loss: {:.8f}, smooth loss: {:.8f}'.format(epoch + 1, loss.item(), loss_dict['smooth'].item()))
+            else:
+                pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.item()))
+            losses.append(loss.item())
             running_time.append(duration)
     mean_loss = np.stack(losses).mean()
     avg_running_time = np.stack(running_time).mean()

diff --git a/utils/builder.py b/utils/builder.py
@@ -392,9 +392,8 @@ def get_metrics(self, metrics_params = None):
         if metrics_params is None:
             metrics_params = self.metrics_params
         metrics_list = metrics_params.get('types', ['MSE', 'MaskedMSE', 'RMSE', 'MaskedRMSE', 'REL', 'MaskedREL', 'MAE', 'MaskedMAE', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]'])
-        metrics_epsilon = metrics_params.get('epsilon', 1e-8)
         from utils.metrics import MetricsRecorder
-        metrics = MetricsRecorder(metrics_list = metrics_list, epsilon = metrics_epsilon)
+        metrics = MetricsRecorder(metrics_list = metrics_list, **metrics_params)
         return metrics
 
     def get_inference_image_size(self, inference_params = None):
@@ -463,10 +462,29 @@ def get_inference_depth_min_max(self, inference_params = None):
         Returns
         -------
 
-        Tuple of (int, int) the min and max depth.
+        Tuple of (float, float) the min and max depth.
         """
         if inference_params is None:
             inference_params = self.inference_params
-        depth_min = inference_params.get('depth_min', 0.1)
+        depth_min = inference_params.get('depth_min', 0.3)
         depth_max = inference_params.get('depth_max', 1.5)
-        return depth_min, depth_max
+        return depth_min, depth_max
+
+    def get_inference_depth_norm(self, inference_params = None):
+        """
+        Get the depth normalization coefficient from inference configuration.
+
+        Parameters
+        ----------
+
+        inference_params: dict, optional, default: None. If inference_params is provided, then use the parameters specified in the inference_params to get the inference depth range. Otherwise, the inference parameters in the self.params will be used to get the inference depth range.
+        
+        Returns
+        -------
+
+        float, the depth normalization coefficient.
+        """
+        if inference_params is None:
+            inference_params = self.inference_params
+        depth_norm = inference_params.get('depth_norm', 1.0)
+        return depth_norm