-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdataexploration.py
122 lines (97 loc) · 3.97 KB
/
dataexploration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# import modules
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import os
import Cooking
import random
# import directories, replace these with your own
RAW_DATA_DIR = "C:\\Derek\\TKS\\AirSim\\data_raw"
COOKED_DATA_DIR = "C:\\Derek\\TKS\\AirSim\\data_cooked"
DATA_FOLDERS = ['normal_1', 'normal_2', 'normal_3', 'normal_4', 'normal_5', 'normal_6', 'swerve_1', 'swerve_2', 'swerve_3']
FIGURE_SIZE = (10,10)
sample_tsv_path = os.path.join(RAW_DATA_DIR, "normal_1/airsim_rec.txt")
sample_tsv = pd.read_csv(sample_tsv_path, sep='\t')
sample_tsv.head()
sample_image_path = os.path.join(RAW_DATA_DIR, "normal_1/images/img_0.png")
sample_image = Image.open(sample_image_path)
sample_image_roi = sample_image.copy()
fillcolor = (255, 0, 0)
draw = ImageDraw.Draw(sample_image_roi)
points = [(1, 76), (1, 135), (255, 135), (255, 76)]
for i in range(0, len(points), 1):
draw.line([points[i], points[(i+1)%len(points)]], fill=fillcolor, width=2)
del draw
# show image with Region of Interest
plt.title('Image with ROI')
plt.imshow(sample_image_roi)
plt.show()
full_path_raw_folders = [os.path.join(RAW_DATA_DIR, f) for f in DATA_FOLDERS]
dataframes = []
for folder in full_path_raw_folders:
current_dataframe = pd.read_csv(os.path.join(folder, 'airsim_rec.txt'), sep='\t')
current_dataframe['Folder'] = folder
dataframes.append(current_dataframe)
dataset = pd.concat(dataframes, axis=0)
# shows steering data of images
"""
print('Number of data points: {0}'.format(dataset.shape[0]))
dataset.head()
"""
min_index = 100
max_index = 1100
steering_angles_normal_1 = dataset[dataset['Folder'].apply(lambda v: 'normal_1' in v)]['Steering'][min_index:max_index]
steering_angles_swerve_1 = dataset[dataset['Folder'].apply(lambda v: 'swerve_1' in v)]['Steering'][min_index:max_index]
plot_index = [i for i in range(min_index, max_index, 1)]
# plots steering angle in accordance to time
"""
fig = plt.figure(figsize=FIGURE_SIZE)
ax1 = fig.add_subplot(111)
ax1.scatter(plot_index, steering_angles_normal_1, c='b', marker='o', label='normal_1')
ax1.scatter(plot_index, steering_angles_swerve_1, c='r', marker='o', label='swerve_1')
plt.legend(loc='upper left');
plt.title('Steering Angles for normal_1 and swerve_1 runs')
plt.xlabel('Time')
plt.ylabel('Steering Angle')
plt.show()
"""
dataset['Is Swerve'] = dataset.apply(lambda r: 'swerve' in r['Folder'], axis=1)
grouped = dataset.groupby(by=['Is Swerve']).size().reset_index()
grouped.columns = ['Is Swerve', 'Count']
def make_autopct(values):
def my_autopct(percent):
total = sum(values)
val = int(round(percent*total/100.0))
return '{0:.2f}% ({1:d})'.format(percent,val)
return my_autopct
# graphs percentage of data points per strategy
"""
pie_labels = ['Normal', 'Swerve']
fig, ax = plt.subplots(figsize=FIGURE_SIZE)
ax.pie(grouped['Count'], labels=pie_labels, autopct = make_autopct(grouped['Count']))
plt.title('Number of data points per driving strategy')
plt.show()
"""
bins = np.arange(-1, 1.05, 0.05)
normal_labels = dataset[dataset['Is Swerve'] == False]['Steering']
swerve_labels = dataset[dataset['Is Swerve'] == True]['Steering']
def steering_histogram(hist_labels, title, color):
plt.figure(figsize=FIGURE_SIZE)
n, b, p = plt.hist(hist_labels.as_matrix(), bins, normed=1, facecolor=color)
plt.xlabel('Steering Angle')
plt.ylabel('Normalized Frequency')
plt.title(title)
plt.show()
# shows frequency of driving angles per strategy
"""
steering_histogram(normal_labels, 'Normal label distribution', 'g')
steering_histogram(swerve_labels, 'Swerve label distribution', 'r')
"""
# cooks the data and exports them to another folder
"""
train_eval_test_split = [0.7, 0.2, 0.1]
full_path_raw_folders = [os.path.join(RAW_DATA_DIR, f) for f in DATA_FOLDERS]
Cooking.cook(full_path_raw_folders, COOKED_DATA_DIR, train_eval_test_split)
"""