-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData.py
70 lines (67 loc) · 3.04 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from collections import defaultdict
class Data:
def __init__(self,file,row_omited: list):
self.row_omitted = row_omited
self.attributes, self.different, self.class_variable, self.data, self.attribute_map, self.categorical_numerical = self.returnData(file)
self.index_class_variable = self.attributes.index(self.class_variable)
def returnData(self,file):
header = []
data = []
different = []
class_variable = ''
attribute_map = defaultdict(list)
categorical_numerical = {}
fileReader = open(file, 'r')
current_line = 0
for line in fileReader.readlines():
line = line.strip('\n')
if current_line == 0:
header = line.split(',')
if '' in header:
header.remove('')
elif current_line == 1 :
different = list(map(int,line.split(',')))
for index, value in enumerate(different):
if value == -1:
self.row_omitted.append(index)
# import pdb; pdb.set_trace()
different = list(filter(lambda x: x != -1, different))
new_header = []
if self.row_omitted is not []:
for index, value in enumerate(header):
if index not in self.row_omitted:
new_header.append(value)
header = new_header
#c
for index, attribute in enumerate(header):
# print(len(different))
if different[index] == 0:
categorical_numerical[attribute] = 'numerical'
else:
categorical_numerical[attribute] = 'categorical'
elif current_line == 2:
class_variable = line
else:
split_data = line.split(',')
split_data.remove('')
if self.row_omitted is not []:
new_data = []
for index, value in enumerate(split_data):
if index not in self.row_omitted:
new_data.append(value)
split_data = new_data
if split_data != [''] and split_data != []:
data.append(split_data)
# print(split_data)
for index, value in enumerate(split_data):
# import pdb; pdb.set_trace()
# print(header)
# print(index)
attribute = header[index]
if attribute not in attribute_map:
attribute_map[attribute].append(value)
else:
if value not in attribute_map[attribute]:
attribute_map[attribute].append(value)
current_line += 1
return header, different, class_variable, data, attribute_map, categorical_numerical