-
Notifications
You must be signed in to change notification settings - Fork 1
/
03_train_sea_ann.py
180 lines (134 loc) · 7.18 KB
/
03_train_sea_ann.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/env python
import os
import pandas as pd
import numpy as np
import xarray as xr
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers
from tensorflow.keras import utils
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings("ignore")
from constants import *
from plot_utils import plot_learning_curves
'''
Train a Multilayer Perceptrons (MLP) Neural Network model
- The model predicts the surface rain rate (RR) given a set 13 channels for Brightness Temperature (TB) measurements
- The model represents a simple Neural Network (NN) with two hidden layers and a final layer which is used to predict the target label.
The comments that explains the process in this source code are taken from:
https://github.com/ecmwf-projects/mooc-machine-learning-weather-climate
The dataset used for training and test of the NN is built from 10 GMI orbits (from 9th March 2014):
- The 13 features correspond exactly to the 13 GMI channels.
- The rainfall rate represents the target variable and has been obtained from the NASA GMI/DPR Level 2 precipitation product (2B-CMB).
This algorithm combines GMI measurements with the reflectivity profiles measured by the Dual-frequency Precipitation Radar (DPR) on board the GPM Core Observatory.
The Observatory is the first spaceborne radar operating at Ka and Ku band and provides vertical profiles of liquid and solid precipitation microphysics, and precipitation rate near the surface.
Some details on the 2B-CMB algorithm can be found in GMI/DPR Level 2 Algorithm Theoretical Basis Document (ATBD):
https://gpm.nasa.gov/resources/documents/gpmdpr-level-2-algorithm-theoretical-basis-document-atbd
The GMI TBs and the DPR rainfall rates (from 2B-CMB) have been matched to build the dataset using a nearest neighbour approach.
Only pixels over ocean and sea where rainfall has been observed (2B-CMB rainfall rate > 0 mm/h) are selected to build the dataset (for a total of 61,906 pixels).
The training dataset is built from 10 orbits of March 2014.
'''
# the model hyperparameters
# the learning rate is the step size at each iteration while moving toward a minimum of a loss function
LEARNING_RATE = 0.001
# an epoch in machine learning means one complete pass of the training dataset through the algorithm
EPOCHS = 1600
# the batch size is the number of training examples utilized in one iteration
# a large batch size should make the training faster but may lead to memory saturation
BATCH_SIZE = 8000
# Create the models directory if it doesn't exist
if not os.path.exists(MODELS_DIR):
os.makedirs(MODELS_DIR)
# splitting the dataset
def split_dataset(dataset, label_dataset):
''' this function splits the dataset between training and test datasets
'X' represents training data and 'y' represents the target label
opt for 50% split between training and test set because the the training set is quite small
'''
choice = np.mod(range(0, len(tensor_df)), 2) == 0 # this variable is true for even positions in the obseravtions sequence
X_train = dataset[choice == 0]
X_test = dataset[choice]
y_train = label_dataset[choice == 0]
y_test = label_dataset[choice]
# return the split dataset
return X_train, X_test, y_train, y_test
# training phase with the training dataset
def train():
'''create a simple model architecture given the small training dataset
construct a MLP model with two hidden layers:
- the first layer contains 10 perceptrons
- the second is made of 20 perceptrons
use a sigmoid activation function (transfer function used in both hidden layers)
use the mean squared error (MSE) as loss function to be minimised
display the mean average error (MAE) for each training iteration (epoch)
'''
# here the network achitecture is defined: it is a feed forward neural network with 2 hidden layers
# 20 perceptrons in the fisrt hidden layer and 10 in the second.
# sigmoids are used as transfer function in both hidden layers
model = Sequential()
model.add(Dense(20, input_dim=input_shape, kernel_initializer='normal', activation='sigmoid')) # first hidden layer
model.add(Dense(10, kernel_initializer='normal', activation='sigmoid')) # second hidden layer
model.add(Dense(1, kernel_initializer='normal', activation='linear')) # output
model.summary()
# the optimizer is the algorithm used for the training.
# Adam is a standard choice, but Scale conjugate gradient (SGD), is also very efficient.
optimizer = optimizers.Adam(lr=LEARNING_RATE)
#optimizer = optimizers.experimental.SGD(learning_rate=LEARNING_RATE1)
# here the model optimzer and the loss function to be minimized during training (mean squared error, MSE) are defined
# the mean absolute error (mae) is also computed as additional metrics
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])
# the training dataset, the batch size and the number of epochs to be used re defined
# validation is also carried out
# monitoring loss and metrics on the test dataset
# at the end of each epoch
history = model.fit(
X_train_scaled,
y_train,
batch_size = BATCH_SIZE,
epochs = EPOCHS,
validation_data = (X_test_scaled, y_test),)
# the model is saved at the end of the training phase in an HFD5 output file
model.save(f'{MODELS_DIR}/{MODEL_FILENAME}')
# retuurn the mode and history
return model, history
# path of the nc data file with training data (TBs) and target labels (surface rain rates)
# the training dataset is built from 10 orbits of March 2014
data_filepath = f'{DATA_DIR}/{DATA_FILENAME_GMI_DPR_RR}'
# read the dataset
ds = xr.open_dataset(data_filepath)
# the training data (the TBs)
train_df = ds['tb'].to_dataframe().unstack()
# the target labels (the surface rain rate)
target = ds['rr'].to_dataframe()
# that amount of data that we're dealing with
print('The shape of the TB features data is', train_df.shape)
print('The shape of the surface rain rate label data is', target.shape)
# conver the dataframes into tensors
tensor_df = tf.convert_to_tensor(train_df, dtype=np.float)
label_df = tf.convert_to_tensor(target, dtype=np.float)
X_train, X_test, y_train, y_test = split_dataset(tensor_df, label_df)
# scaling: standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
# mean and variance are calculated on the training dataset and applied to the training dataset
X_train_scaled = scaler.fit_transform(X_train)
# mean and variance (previously calculated) are applied to the test dataset
X_test_scaled = scaler.transform(X_test)
# print the result of splitting the dataset
print('The shape of the training dataset is', X_train.shape)
print('The shape of the test dataset is', X_test.shape)
# set the input shape
input_shape = X_train.shape[1]
print(f'Feature shape, i.e. number of TB channels: {input_shape}')
# for trainig with CPU (Slower)
model, history = train()
# for trainig with GPU (Faster) uncomment next 2 lines.
#with tf.device("/device:GPU:0"):
# model, history = train()
# plot the training's learning curve
plot_learning_curves(history,
#show = False,
#filepath = "figures/fig6_ann_sea_learning_curves.png"
)