-
Notifications
You must be signed in to change notification settings - Fork 0
/
Train.py
132 lines (102 loc) · 4.11 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tcn import TCN
import matplotlib.pyplot as plt
import time
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
FOLDER = "blinks/"
# Preprocess a file
def preprocess_file(filename):
data = pd.read_csv(FOLDER + filename, header=None)
X = np.array(data.iloc[:, 1:7])
y = np.array(data.iloc[:, 7])
return X, y
# Create overlapping window samples
def create_overlapping_window_samples(X, y, window_size, step_size, out_window):
X_samples, y_samples = [], []
for i in range(0, len(X) - window_size, step_size):
X_samples.append(X[i:i + window_size, :])
y_mean = np.mean(y[i + window_size - out_window:i + window_size], axis=0)
y_samples.append(y_mean)
return np.array(X_samples), np.array(y_samples)
window_size = 600
step_size = 60
out_window = 20
X_all, y_all = [], []
filenames = []
for file in os.listdir(FOLDER):
filenames.append(file)
for filename in filenames:
X, y = preprocess_file(filename)
X_samples, y_samples = create_overlapping_window_samples(X, y, window_size, step_size, out_window)
# Append the samples to the overall dataset
X_all.append(X_samples)
y_all.append(y_samples)
# Concatenate all samples
X_all = np.concatenate(X_all, axis=0)
y_all = np.concatenate(y_all, axis=0)
print(np.mean(y_all))
# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size=0.2, random_state=42)
def train_LSTM():
# Build the LSTM model
model = Sequential()
model.add(LSTM(32, input_shape=(window_size, 6), return_sequences=True))
model.add(Dropout(0.5))
# model.add(LSTM(32, return_sequences=True))
# model.add(Dropout(0.2))
model.add(LSTM(16))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
# model.add(Dense(1, activation='linear'))
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
# model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=8, validation_data=(X_val, y_val))
return model, history
def train_TCN():
model = Sequential()
model.add(TCN(input_shape=(window_size, 6), nb_filters=32, kernel_size=3, nb_stacks=1, dilations=[4, 8, 16, 32, 64],
activation='relu', padding='causal', use_skip_connections=True, return_sequences=False))
model.add(Flatten())
# model.add(Dense(1, activation='sigmoid'))
model.add(Dense(1, activation='linear'))
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
history = model.fit(X_train, y_train, epochs=20, batch_size=8, validation_data=(X_val, y_val))
return model, history
model, history = train_LSTM()
# model, history = train_TCN()
# Make predictions on the test data
before = time.time()
y_pred = model.predict(X_val)
after = time.time()
print(f"Time taken to predict {len(X_val)} samples: {after - before} seconds")
# Print the predicted and true outcomes for each test data
for i in range(len(X_val)):
print(f"Test Data {i + 1}:")
print(f"Predicted outcome: {y_pred[i]}")
print(f"True outcome: {y_val[i]}\n")
# Plot the accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('LSTM Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
# # Plot the Mean Absolute Error (MAE)
# plt.plot(history.history['mae'], label='Training MAE')
# plt.plot(history.history['val_mae'], label='Validation MAE')
# plt.title('LSTM Model Mean Absolute Error')
# plt.xlabel('Epoch')
# plt.ylabel('MAE')
# plt.legend()
# plt.show()