Skip to content

Commit

Permalink
update smooth loss
Browse files Browse the repository at this point in the history
  • Loading branch information
Galaxies99 committed Sep 10, 2021
1 parent d5f9424 commit 4df4487
Show file tree
Hide file tree
Showing 15 changed files with 545 additions and 204 deletions.
2 changes: 2 additions & 0 deletions configs/320x240/train_tg_val_tg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
"rgb_augmentation_probability": 0.8
"depth_min": 0.0
"depth_max": 10.0
"depth_norm": 10.0
"test":
"type": "transcg"
"data_dir": "data"
"image_size": !!python/tuple [320, 240]
"use_augmentation": False
"depth_min": 0.0
"depth_max": 10.0
"depth_norm": 10.0

"dataloader":
"num_workers": 48
Expand Down
61 changes: 61 additions & 0 deletions configs/320x240/train_tg_val_tg_add.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# script id: 1
"model":
"type": "DFNet"
"params":
"in_channels": 4
"hidden_channels": 64
"L": 5
"k": 12

"optimizer":
"type": "AdamW"
"params":
"lr": 0.001

"lr_scheduler":
"type": "MultiStepLR"
"params":
"milestones": [5, 15, 25, 35]
"gamma": 0.2

"dataset":
"train":
"type": "transcg"
"data_dir": "data"
"image_size": !!python/tuple [320, 240]
"use_augmentation": True
"rgb_augmentation_probability": 0.8
"depth_min": 0.0
"depth_max": 10.0
"depth_norm": 10.0
"test":
"type": "transcg"
"data_dir": "data"
"image_size": !!python/tuple [320, 240]
"use_augmentation": False
"depth_min": 0.0
"depth_max": 10.0
"depth_norm": 10.0

"dataloader":
"num_workers": 48
"shuffle": True
"drop_last": True

"trainer":
"batch_size": 32
"test_batch_size": 1
"multigpu": True
"max_epoch": 40
"criterion":
"type": "custom_masked_mse_loss"
"epsilon": 0.00000001

"metrics":
"types": ["MSE", "MaskedMSE", "RMSE", "MaskedRMSE", "REL", "MaskedREL", "MAE", "MaskedMAE", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"]
"epsilon": 0.00000001
"depth_scale": 10.0

"stats":
"stats_dir": "stats"
"stats_exper": "train-tg-val-tg-add"
63 changes: 63 additions & 0 deletions configs/320x240/train_tg_val_tg_combine.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# script id: 1
"model":
"type": "DFNet"
"params":
"in_channels": 4
"hidden_channels": 64
"L": 5
"k": 12

"optimizer":
"type": "AdamW"
"params":
"lr": 0.001

"lr_scheduler":
"type": "MultiStepLR"
"params":
"milestones": [5, 15, 25, 35]
"gamma": 0.2

"dataset":
"train":
"type": "transcg"
"data_dir": "data"
"image_size": !!python/tuple [320, 240]
"use_augmentation": True
"rgb_augmentation_probability": 0.8
"depth_min": 0.3
"depth_max": 1.5
"depth_norm": 1.0
"test":
"type": "transcg"
"data_dir": "data"
"image_size": !!python/tuple [320, 240]
"use_augmentation": False
"depth_min": 0.3
"depth_max": 1.5
"depth_norm": 1.0

"dataloader":
"num_workers": 48
"shuffle": True
"drop_last": True

"trainer":
"batch_size": 32
"test_batch_size": 1
"multigpu": True
"max_epoch": 40
"criterion":
"type": "custom_masked_mse_loss"
"epsilon": 0.00000001
"combined_smooth": True
"combined_beta": 0.005

"metrics":
"types": ["MSE", "MaskedMSE", "RMSE", "MaskedRMSE", "REL", "MaskedREL", "MAE", "MaskedMAE", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"]
"epsilon": 0.00000001
"depth_scale": 1.0

"stats":
"stats_dir": "stats"
"stats_exper": "train-tg-val-tg-comb"
2 changes: 1 addition & 1 deletion configs/inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"k": 12

"inference":
"checkpoint_path": "stats/checkpoint.tar"
"checkpoint_path": "stats/train-tg-val-tg-comb/checkpoint.tar"
"image_size": !!python/tuple [320, 240]
"cuda_id": 0
"depth_min": 0.0
Expand Down
12 changes: 6 additions & 6 deletions datasets/transcg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def __init__(self, data_dir, split = 'train', **kwargs):
raise AttributeError('Invalid split option.')
self.data_dir = data_dir
self.split = split
self.high_resolution = kwargs.get('high_resolution', False)
if self.high_resolution and split == 'train':
raise AttributeError('Does not support returning high resolution images during training. If you want to train on high resolution samples, please set image_size arguments in high resolution.')
with open(os.path.join(self.data_dir, 'metadata.json'), 'r') as fp:
self.dataset_metadata = json.load(fp)
self.scene_num = self.dataset_metadata['total_scenes']
Expand Down Expand Up @@ -64,19 +61,22 @@ def __init__(self, data_dir, split = 'train', **kwargs):
])
# Integrity double-check
assert len(self.sample_info) == self.total_samples, "Error in total samples, expect {} samples, found {} samples.".format(self.total_samples, len(self.sample_info))
# Other parameters
self.cam_intrinsics = [None, np.load(os.path.join(self.data_dir, 'camera_intrinsics', 'camIntrinsics-D435.npy')), np.load(os.path.join(self.data_dir, 'camera_intrinsics', 'camIntrinsics-L515.npy'))]
self.use_aug = kwargs.get('use_augmentation', True)
self.rgb_aug_prob = kwargs.get('rgb_augmentation_probability', 0.8)
self.image_size = kwargs.get('image_size', (1280, 720))
self.depth_min = kwargs.get('depth_min', 0.0)
self.depth_max = kwargs.get('depth_max', 10.0)
self.depth_min = kwargs.get('depth_min', 0.3)
self.depth_max = kwargs.get('depth_max', 1.5)
self.depth_norm = kwargs.get('depth_norm', 1.0)

def __getitem__(self, id):
img_path, camera_type, scene_type = self.sample_info[id]
rgb = np.array(Image.open(os.path.join(img_path, 'rgb{}.png'.format(camera_type))), dtype = np.float32)
depth = np.array(Image.open(os.path.join(img_path, 'depth{}.png'.format(camera_type))), dtype = np.float32)
depth_gt = np.array(Image.open(os.path.join(img_path, 'depth{}-gt.png'.format(camera_type))), dtype = np.float32)
depth_gt_mask = np.array(Image.open(os.path.join(img_path, 'depth{}-gt-mask.png'.format(camera_type))), dtype = np.uint8)
return process_data(rgb, depth, depth_gt, depth_gt_mask, scene_type, camera_type, split = self.split, image_size = self.image_size, depth_min = self.depth_min, depth_max = self.depth_max, use_aug = self.use_aug, rgb_aug_prob = self.rgb_aug_prob, retain_original = self.high_resolution)
return process_data(rgb, depth, depth_gt, depth_gt_mask, self.cam_intrinsics[camera_type], scene_type, camera_type, split = self.split, image_size = self.image_size, depth_min = self.depth_min, depth_max = self.depth_max, depth_norm = self.depth_norm, use_aug = self.use_aug, rgb_aug_prob = self.rgb_aug_prob)

def __len__(self):
return self.total_samples
12 changes: 8 additions & 4 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(self, cfg_path = os.path.join('configs', 'inference.yaml'), with_in

self.image_size = self.builder.get_inference_image_size()
self.depth_min, self.depth_max = self.builder.get_inference_depth_min_max()
self.depth_norm = self.builder.get_inference_depth_norm()

def inference(self, rgb, depth, target_size = (1280, 720)):
"""
Expand All @@ -86,7 +87,10 @@ def inference(self, rgb, depth, target_size = (1280, 720)):

rgb = cv2.resize(rgb, self.image_size, interpolation = cv2.INTER_NEAREST)
depth = cv2.resize(depth, self.image_size, interpolation = cv2.INTER_NEAREST)
depth = (depth - self.depth_min) / (self.depth_max - self.depth_min)
depth = np.where(depth < self.depth_min, 0, depth)
depth = np.where(depth > self.depth_max, 0, depth)
depth[np.isnan(depth)] = 0
depth = depth / self.depth_norm
rgb = (rgb / 255.0).transpose(2, 0, 1)
rgb = torch.FloatTensor(rgb).to(self.device).unsqueeze(0)
depth = torch.FloatTensor(depth).to(self.device).unsqueeze(0)
Expand All @@ -97,7 +101,7 @@ def inference(self, rgb, depth, target_size = (1280, 720)):
if self.with_info:
self.logger.info("Inference finished, time: {:.4f}s.".format(time_end - time_start))
depth_res = depth_res.squeeze(0).cpu().detach().numpy()
depth_res = depth_res * (self.depth_max - self.depth_min) + self.depth_min
depth_res = cv2.resize(depth_res, target_size, interpolation = cv2.INTER_NEAREST)
depth_res = depth_res * self.depth_norm
depth_res = cv2.resize(depth_res, target_size, interpolation = cv2.INTER_LANCZOS4)
return depth_res


1 change: 1 addition & 0 deletions sample_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def draw_point_cloud(color, depth, camera_intrinsics, use_mask = False, use_inpa
cam_intrinsics = np.load('data/camera_intrinsics/camIntrinsics-D435.npy')

res = np.clip(res, 0.1, 1.5)
depth = np.clip(depth, 0.1, 1.5)

cloud = draw_point_cloud(rgb, res, cam_intrinsics, scale = 1.0)

Expand Down
25 changes: 13 additions & 12 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from tqdm import tqdm
from utils.logger import ColoredLogger
from utils.builder import ConfigBuilder
from utils.functions import to_device
from time import perf_counter


Expand Down Expand Up @@ -68,22 +69,22 @@ def test():
running_time = []
losses = []
with tqdm(test_dataloader) as pbar:
for data in pbar:
rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
rgb = rgb.to(device)
depth = depth.to(device)
depth_gt = depth_gt.to(device)
depth_gt_mask = depth_gt_mask.to(device)
scene_mask = scene_mask.to(device)
for data_dict in pbar:
data_dict = to_device(data_dict, device)
with torch.no_grad():
time_start = perf_counter()
res = model(rgb, depth)
res = model(data_dict['rgb'], data_dict['depth'])
time_end = perf_counter()
loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
_ = metrics.evaluate_batch(res, depth_gt, depth_gt_mask, scene_mask, record = True)
data_dict['pred'] = res
loss_dict = criterion(data_dict)
loss = loss_dict['loss']
_ = metrics.evaluate_batch(data_dict, record = True)
duration = time_end - time_start
pbar.set_description('Loss: {:.8f}, model time: {:.4f}s'.format(loss.mean().item(), duration))
losses.append(loss.mean().item())
if 'smooth' in loss_dict.keys():
pbar.set_description('Loss: {:.8f}, smooth loss: {:.8f}'.format(loss.item(), loss_dict['smooth'].item()))
else:
pbar.set_description('Loss: {:.8f}'.format(loss.item()))
losses.append(loss.item())
running_time.append(duration)
mean_loss = np.stack(losses).mean()
avg_running_time = np.stack(running_time).mean()
Expand Down
46 changes: 23 additions & 23 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from utils.logger import ColoredLogger
from utils.builder import ConfigBuilder
from utils.constants import LOSS_INF
from utils.functions import display_results
from utils.functions import display_results, to_device
from time import perf_counter


Expand Down Expand Up @@ -87,19 +87,19 @@ def train_one_epoch(epoch):
model.train()
losses = []
with tqdm(train_dataloader) as pbar:
for data in pbar:
for data_dict in pbar:
optimizer.zero_grad()
rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
rgb = rgb.to(device)
depth = depth.to(device)
depth_gt = depth_gt.to(device)
depth_gt_mask = depth_gt_mask.to(device)
scene_mask = scene_mask.to(device)
res = model(rgb, depth)
loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
data_dict = to_device(data_dict, device)
res = model(data_dict['rgb'], data_dict['depth'])
data_dict['pred'] = res
loss_dict = criterion(data_dict)
loss = loss_dict['loss']
loss.backward()
optimizer.step()
pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.mean().item()))
if 'smooth' in loss_dict.keys():
pbar.set_description('Epoch {}, loss: {:.8f}, smooth loss: {:.8f}'.format(epoch + 1, loss.item(), loss_dict['smooth'].item()))
else:
pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.item()))
losses.append(loss.mean().item())
mean_loss = np.stack(losses).mean()
logger.info('Finish training process in epoch {}, mean training loss: {:.8f}'.format(epoch + 1, mean_loss))
Expand All @@ -112,22 +112,22 @@ def test_one_epoch(epoch):
running_time = []
losses = []
with tqdm(test_dataloader) as pbar:
for data in pbar:
rgb, depth, depth_gt, depth_gt_mask, scene_mask = data
rgb = rgb.to(device)
depth = depth.to(device)
depth_gt = depth_gt.to(device)
depth_gt_mask = depth_gt_mask.to(device)
scene_mask = scene_mask.to(device)
for data_dict in pbar:
data_dict = to_device(data_dict, device)
with torch.no_grad():
time_start = perf_counter()
res = model(rgb, depth)
res = model(data_dict['rgb'], data_dict['depth'])
time_end = perf_counter()
loss = criterion(res, depth_gt, depth_gt_mask, scene_mask)
_ = metrics.evaluate_batch(res, depth_gt, depth_gt_mask, scene_mask, record = True)
data_dict['pred'] = res
loss_dict = criterion(data_dict)
loss = loss_dict['loss']
_ = metrics.evaluate_batch(data_dict, record = True)
duration = time_end - time_start
pbar.set_description('Epoch {}, loss: {:.8f}, model time: {:.4f}s'.format(epoch + 1, loss.mean().item(), duration))
losses.append(loss.mean().item())
if 'smooth' in loss_dict.keys():
pbar.set_description('Epoch {}, loss: {:.8f}, smooth loss: {:.8f}'.format(epoch + 1, loss.item(), loss_dict['smooth'].item()))
else:
pbar.set_description('Epoch {}, loss: {:.8f}'.format(epoch + 1, loss.item()))
losses.append(loss.item())
running_time.append(duration)
mean_loss = np.stack(losses).mean()
avg_running_time = np.stack(running_time).mean()
Expand Down
28 changes: 23 additions & 5 deletions utils/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,8 @@ def get_metrics(self, metrics_params = None):
if metrics_params is None:
metrics_params = self.metrics_params
metrics_list = metrics_params.get('types', ['MSE', 'MaskedMSE', 'RMSE', 'MaskedRMSE', 'REL', 'MaskedREL', 'MAE', 'MaskedMAE', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]'])
metrics_epsilon = metrics_params.get('epsilon', 1e-8)
from utils.metrics import MetricsRecorder
metrics = MetricsRecorder(metrics_list = metrics_list, epsilon = metrics_epsilon)
metrics = MetricsRecorder(metrics_list = metrics_list, **metrics_params)
return metrics

def get_inference_image_size(self, inference_params = None):
Expand Down Expand Up @@ -463,10 +462,29 @@ def get_inference_depth_min_max(self, inference_params = None):
Returns
-------
Tuple of (int, int) the min and max depth.
Tuple of (float, float) the min and max depth.
"""
if inference_params is None:
inference_params = self.inference_params
depth_min = inference_params.get('depth_min', 0.1)
depth_min = inference_params.get('depth_min', 0.3)
depth_max = inference_params.get('depth_max', 1.5)
return depth_min, depth_max
return depth_min, depth_max

def get_inference_depth_norm(self, inference_params = None):
"""
Get the depth normalization coefficient from inference configuration.
Parameters
----------
inference_params: dict, optional, default: None. If inference_params is provided, then use the parameters specified in the inference_params to get the inference depth range. Otherwise, the inference parameters in the self.params will be used to get the inference depth range.
Returns
-------
float, the depth normalization coefficient.
"""
if inference_params is None:
inference_params = self.inference_params
depth_norm = inference_params.get('depth_norm', 1.0)
return depth_norm
Loading

0 comments on commit 4df4487

Please sign in to comment.