-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
85 changed files
with
4,819 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
|
||
import torch | ||
import torchvision | ||
import torchvision.transforms.functional as TF | ||
|
||
from torch.utils.data import Dataset | ||
from PIL import Image | ||
from torchvision import transforms | ||
|
||
|
||
class BaselineDataset(Dataset): | ||
def __init__(self, KITTIBaseDir, height=256, width=256, train=True, infoPath=None, augmentation=False, | ||
augmentationProb=0.3, channels=None, groundTruth=False): | ||
self.baseDir = KITTIBaseDir | ||
|
||
# Path to disparity directory | ||
self.obstacleDir = os.path.join(self.baseDir, 'obstacles') | ||
# Path to lane directory | ||
self.laneDir = os.path.join(self.baseDir, 'lane') | ||
# Path to road directory | ||
self.roadDir = os.path.join(self.baseDir, 'road') | ||
# Path to target directory | ||
self.targetDir = os.path.join(self.baseDir, 'target') | ||
# Path to vehicles directory | ||
self.vehiclesDir = os.path.join(self.baseDir, 'vehicles') | ||
# Target GT refers to the occupancy grid of the target vehicle id computed using ground truth | ||
self.targetGTDir = os.path.join(self.baseDir, 'targetGT') | ||
# Target GT but not gaussian data | ||
self.targetGTNonGaussianDir = os.path.join(self.baseDir, 'non-gaussian') | ||
# The rgb occupany map directory | ||
self.rgbDir = os.path.join(self.baseDir, 'rgbGrid') | ||
|
||
self.height, self.width = height, width | ||
|
||
# train = True if train dataset, else train = False | ||
self.train = train | ||
|
||
self.transform = transforms.Compose([ | ||
transforms.Resize(self.height), | ||
transforms.ToTensor() | ||
]) | ||
|
||
# Affine Transformation Parameters | ||
self.horizontalShift = 0 | ||
self.verticalShift = 0 | ||
|
||
# augmentation = True if there is dataset augmentation | ||
self.augmentation = augmentation | ||
|
||
# Augmentation Probability | ||
self.augmentationProb = augmentationProb | ||
|
||
# True if we are using ground truth data | ||
self.groundTruth = groundTruth | ||
|
||
# Channels to Use | ||
self.channels = channels | ||
|
||
# Path to the dataset info / csv file | ||
self.infoPath = infoPath | ||
|
||
self.train_df = pd.read_csv(self.infoPath, sep=' ', names=['kittiSequence', 'vehicleId', | ||
'startFrame', 'endFrame', 'numFrames']) | ||
|
||
# Length of the Pandas Data Frame | ||
self.dataFrameLen = len(self.train_df) | ||
|
||
# Length of dataset | ||
self.len = int(self.train_df['numFrames'].sum()) - self.dataFrameLen | ||
|
||
# Add starting and ending indexes column to the data frame | ||
self.train_df['startIndex'] = np.zeros((self.dataFrameLen, 1)) | ||
self.train_df['endIndex'] = np.zeros((self.dataFrameLen, 1)) | ||
|
||
# List to Map indexes to the corresponding vehicle | ||
self.indexToVehicle = np.ones((self.len, 1)) | ||
|
||
# Updating indexToVehicle and the dataFrame | ||
curIdx = 0 | ||
for row in range(self.dataFrameLen): | ||
cur_frame = self.train_df.loc[row] | ||
startFrame = cur_frame['startFrame'] | ||
endFrame = cur_frame['endFrame'] | ||
seqLength = endFrame - startFrame | ||
|
||
startIdx = int(curIdx) | ||
endIdx = int(curIdx + seqLength - 1) | ||
|
||
self.train_df.loc[row, 'startIndex'] = startIdx | ||
self.train_df.loc[row, 'endIndex'] = endIdx | ||
curIdx = endIdx + 1 | ||
|
||
self.indexToVehicle[startIdx:curIdx, 0] = row | ||
|
||
def __len__(self): | ||
return self.len | ||
|
||
def affineTransformParams(self): | ||
# Return default params if value of prob greater than augmentationProb | ||
prob = np.random.random() | ||
horizontal_shift = 0 | ||
vertical_shift = 0 | ||
if prob < self.augmentationProb and self.augmentation: | ||
# horizontal_shift = np.random.randint(- int(self.width * 0.2), int(self.width * 0.2)) | ||
vertical_shift = np.random.randint(- int(self.height * 0.2), int(self.height * 0.2)) | ||
|
||
return horizontal_shift, vertical_shift | ||
|
||
def __getitem__(self, idx): | ||
row = int(self.indexToVehicle[idx, 0]) | ||
# Get vehicle Id | ||
vehicleId = self.train_df.loc[row, 'vehicleId'] | ||
# Get the kitti sequence no | ||
kittiSeqNum = self.train_df.loc[row, 'kittiSequence'] | ||
# Get the num of kitti frames | ||
numFrames = self.train_df.loc[row, 'numFrames'] | ||
# Get the Current frame | ||
offset = idx - self.train_df.loc[row, 'startIndex'] | ||
frame1 = int(self.train_df.loc[row, 'startFrame'] + offset) | ||
frame2 = int(frame1 + 1) | ||
|
||
# Load image for current frame | ||
curLaneImg = Image.open(os.path.join(self.laneDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6)+'.png')) | ||
curRoadImg = Image.open(os.path.join(self.roadDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6)+'.png')) | ||
curObstacleImg = Image.open(os.path.join(self.obstacleDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6)+'.png')) | ||
curTargetImg = Image.open(os.path.join(self.targetGTDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6), str(vehicleId).zfill(6)+'.png')) | ||
curVehiclesImg = Image.open(os.path.join(self.vehiclesDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6), str(vehicleId).zfill(6)+'.png')) | ||
rgbImage = Image.open(os.path.join(self.rgbDir, str(kittiSeqNum).zfill(4), | ||
str(frame1).zfill(6)+'.png')) | ||
|
||
# Load image for next frame | ||
nextTargetImg = Image.open(os.path.join(self.targetDir, str(kittiSeqNum).zfill(4), | ||
str(frame2).zfill(6), str(vehicleId).zfill(6) + '.png')) | ||
if self.groundTruth: | ||
nextTargetImg = Image.open(os.path.join(self.targetGTDir, str(kittiSeqNum).zfill(4), | ||
str(frame2).zfill(6), str(vehicleId).zfill(6) + '.png')) | ||
|
||
# Apply Affine Transforms | ||
if self.train: | ||
degree = 0 | ||
curLaneImg = TF.affine(curLaneImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
curRoadImg = TF.affine(curRoadImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
curObstacleImg = TF.affine(curObstacleImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
curTargetImg = TF.affine(curTargetImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
curVehiclesImg = TF.affine(curVehiclesImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
nextTargetImg = TF.affine(nextTargetImg, degree, (self.horizontalShift, self.verticalShift), | ||
1, 0, fillcolor=0) | ||
|
||
# Apply simple torchvision transforms | ||
curLaneTensor = self.transform(curLaneImg) | ||
curRoadTensor = self.transform(curRoadImg) | ||
curObstacleTensor = self.transform(curObstacleImg) | ||
curVehiclesTensor = self.transform(curVehiclesImg) | ||
curTargetTensor = self.transform(curTargetImg) | ||
nextTargetTensor = self.transform(nextTargetImg) | ||
rgbTensor = self.transform(rgbImage) | ||
|
||
inpTensor = curTargetTensor | ||
|
||
# Concatenating the channels: | ||
inpTensor = torch.cat((inpTensor, rgbTensor), dim=0) | ||
inpTensor = torch.cat((inpTensor, nextTargetTensor), dim=0) | ||
|
||
endOfSequence = False | ||
if frame2 == self.train_df.loc[row, 'endFrame']: | ||
self.horizontalShift, self.verticalShift = self.affineTransformParams() | ||
endOfSequence = True | ||
|
||
augmentation = True # Default Value if self.augmentation is True | ||
if self.horizontalShift == 0 and self.verticalShift == 0: | ||
augmentation = False | ||
|
||
return inpTensor, kittiSeqNum, vehicleId, frame1, frame2, endOfSequence, offset, numFrames, augmentation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
""" | ||
Implements a Convolutional LSTM | ||
Inspired from the elegant implementation available here. | ||
https://github.com/ndrplz/ConvLSTM_pytorch/blob/master/convlstm.py | ||
""" | ||
|
||
import torch | ||
from torch.autograd import Variable | ||
import torch.nn as nn | ||
|
||
|
||
class ConvLSTMCell(nn.Module): | ||
|
||
def __init__(self, input_shape, c_in, hidden_size, kernel_size): | ||
|
||
""" | ||
Initialize a ConvLSTMCell object | ||
input_shape: (Width, Height) | ||
c_in: number of channels in input | ||
hidden_size: number of channels in hidden layer | ||
kernel_size: conv kernel dimensions (F1, F2) | ||
""" | ||
|
||
super(ConvLSTMCell, self).__init__() | ||
|
||
self.width, self.height = input_shape | ||
self.c_in = c_in | ||
self.hidden_size = hidden_size | ||
self.kernel_size = kernel_size | ||
self.padding = kernel_size[0] // 2, kernel_size[1] // 2 | ||
self.batch_size = 1 | ||
|
||
self.conv = nn.Conv2d(in_channels = self.c_in + self.hidden_size, \ | ||
out_channels = 4 * self.hidden_size, kernel_size = self.kernel_size, \ | ||
padding = self.padding, bias = True) | ||
|
||
self.h_cur, self.c_cur = self.init_hidden(self.batch_size) | ||
|
||
def forward(self, x_cur, s_cur = None): | ||
|
||
""" | ||
Does a forward pass | ||
x_cur: input at the current step | ||
s_cur: state (from the previous step), i.e., (the current state) | ||
s_cur = (h_cur, c_cur) (h_cur -> output, c_cur -> cellstate) | ||
""" | ||
|
||
if s_cur is not None: | ||
self.h_cur, self.c_cur = s_cur | ||
else: | ||
# Initialize | ||
self.h_cur, self.c_cur = self.init_hidden(self.batch_size) | ||
|
||
combined = torch.cat([x_cur, self.h_cur], dim=1) | ||
|
||
# Perform conv | ||
combined_conv_ = self.conv(combined) | ||
|
||
# Split into input, forget, output, and activation gates. Apply non linearities | ||
cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv_, self.hidden_size, dim = 1) | ||
i = torch.sigmoid(cc_i) | ||
f = torch.sigmoid(cc_f) | ||
o = torch.sigmoid(cc_o) | ||
g = torch.tanh(cc_g) | ||
|
||
# Update state | ||
c_next = f * self.c_cur + i * g | ||
h_next = o * torch.tanh(c_next) | ||
|
||
self.h_cur = h_next | ||
self.c_cur = c_next | ||
|
||
return self.h_cur, self.c_cur | ||
|
||
def init_hidden(self, batch_size): | ||
return(Variable(torch.zeros(batch_size, self.hidden_size, self.height, self.width)).cuda(), \ | ||
Variable(torch.zeros(batch_size, self.hidden_size, self.height, self.width)).cuda()) |
Oops, something went wrong.