-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathdata.py
115 lines (94 loc) · 3.51 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
def extract_file_content_binary_float64(filename, shape, offset=0):
"""
extract the content of a binary file which contains 64-bit float data into a 2-d array
:param filename: the name of the file that needs to be extracted
:param shape: the shape of the 2-d array
:param offset: the offset of the file from where the content is read
:return: a 2-d array contains the file content
"""
dt = numpy.dtype(numpy.float64).newbyteorder('>')
with open(filename, 'rb') as bytestream:
if offset != 0:
bytestream.seek(offset)
buf = bytestream.read(shape[0] * shape[1] * 8)
data = numpy.frombuffer(buf, dtype=dt)
data = data.reshape(shape)
return data
def extract_file_content_float64(filename, shape):
"""
extract the content of a csv file which contains 64-bit float data into a 2-d array
:param filename: the name of the file that needs to be extracted
:param shape: the shape of the 2-d array
:return: a 2-d array contains the file content
"""
dt = numpy.dtype(numpy.float64).newbyteorder('>')
data = numpy.ndarray(shape=(0, 0), dtype=dt)
with open(filename, 'r') as file_stream:
for line in file_stream:
data = numpy.append(data, numpy.array(line.split(',')).astype(dt))
data = data.reshape(shape)
return data
def convert2csv_float64(src_filename, dest_filename, shape):
"""
convert a binary file which contains 64-bit float data into a csv file
:param src_filename: the name of the file to be converted
:param dest_filename: the name of the file to be generated
:param shape: the shape of the data set
:return:
"""
data = extract_file_content_binary_float64(src_filename, shape)
with open(dest_filename, 'w') as file_stream:
for i in range(0, shape[0]):
file_stream.write(",".join(data[i, ...].astype('str')) + "\n")
def write_csv_file(file_name, data):
with open(file_name, 'w') as file_stream:
for row in data:
file_stream.write(','.join(row.astype(numpy.str)) + '\n')
def modify_x(train_x, num):
x = train_x.copy()
for i in range(num - 1):
data = numpy.roll(train_x, -(i + 1), axis=0)
x = numpy.concatenate((x, data), axis=1)
return x.reshape(train_x.shape[0], num, train_x.shape[1])
def time_series_to_supervised(data_frame, lag=1):
"""
frame a sequence as a supervised learning problem
:param data_frame:
:param lag:
:return:
"""
df = DataFrame(data_frame)
columns = [df.shift(i) for i in range(1, lag + 1)]
columns.append(df)
df = concat(columns, axis=1)
df.fillna(0, inplace=True)
return df
def difference(data_set, interval=1):
"""
create a difference series
:param data_set:
:param interval:
:return:
"""
diff = list()
for i in range(interval, len(data_set)):
value = data_set[i] - data_set[i - interval]
diff.append(value)
return numpy.array(diff).reshape(data_set.shape[0] - interval, -1)
def inverse_difference(history, y_hat, interval=1):
"""
invert difference value
:param history:
:param y_hat:
:param interval:
:return:
"""
return y_hat + history[-interval]
def gen_scaler(min_, max_):
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(numpy.concatenate((numpy.ones((1, 2)) * min_, numpy.ones((1, 2)) * max_), axis=0))
return scaler