-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadvancedTechniques.ml
246 lines (222 loc) · 9.36 KB
/
advancedTechniques.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
(* General *)
(* Desc: Calculate the mean of the values in a float array.
flaot array -> float -> float *)
let meanArr arr n = (Array.fold_left (+.) 0. arr)/. n;;
(* Desc: Calculate the Standard Deviation of the values in an array.
float array -> float -> float -> float *)
let sdArr arr mu n =
let numa = Array.map (fun xi -> (xi-.mu)**2. ) arr in
let sum = Array.fold_left (+.) 0. numa in
sqrt((sum)/.n);;
(* Desc: The Negative Log Likelihood cost function
float array -> float -> float -> float -> float
*)
let negativeLogLikelihood y mu s n =
let insum = let den = 2.*. s**2. in
Array.map (fun yi -> ((yi-.mu)**2.)/.den ) y in
let suma = Array.fold_left (+.) 0. insum in
n*.(log10(s) +. 0.3990899342) +. suma;;
(* Desc: Scwartz Information Criterion penalty function. Used to avoid over fitting *)
let sicPenalty p n m =
p*.log10(float_of_int n)*.(float_of_int m);;
(* Desc: Split an array into a list of array given a list of location to break the array.
*)
let getSegments arr tau n =
let rec aux acc pre = function
| [] -> [arr]
| hd::[] -> List.rev ((Array.sub arr hd (n-hd))::(Array.sub arr pre (hd-pre))::acc)
| hd::tl -> aux ((Array.sub arr pre (hd-pre))::acc) hd tl (* should be hd+1;hd+2? *)
in aux [] 0 tau;;
(* Desc: Fit the error of a vector using the negative log likelihood. *)
let fitError arr =
let n= float_of_int(Array.length arr) in
let mean = meanArr arr n in
let sd = sdArr arr mean n in
2.*.( negativeLogLikelihood arr mean sd n );;
(* Desc: Fit the error of a list of segements (arrays). Twice the negative log likelihood is
used as the cost functio and no penalty is used. Must be applied externally.
float array list -> float
TODO: abstract the cost function. Optionally add a penalty function as input.
*)
let fitListError lst =
let rec aux error = function
| [] -> error
| hd::tl -> aux (error+.(fitError hd)) tl
in aux 0. lst;;
(*
Desc: Use the binary segmentation algorithm to partition a vector and get
list of tuples with the mean value of the segment and its length.
Input:
y: The time series (An array of floats)
costFun: The cost function to be executed, must take an array of floats
betaFun: The penalty function, must take an integer (depth of segment)
*)
let binarySegmentation y costFun betaFun =
let n = Array.length y in
let firstChnk = costFun y in
let rec aux arr n costChnk sB l_i =
let beta = betaFun(sB) in
let nf = float_of_int n in
let rec exhaust i cost_1 i_1 =
if i >= n then (
let costL, costR = (costFun (Array.sub arr 0 i_1) ), (costFun (Array.sub arr i_1 (n-i_1)) ) in
if cost_1 < costChnk then
(aux (Array.sub arr 0 i_1) i_1 costL (sB+1) (l_i) )@(
aux (Array.sub arr i_1 (n-i_1)) (n-i_1) costR (sB+1) (l_i+i_1))
else [(n,meanArr arr nf)])
else
let costL, costR = (costFun (Array.sub arr 0 i) ), (costFun (Array.sub arr i (n-i)) ) in
let cost = costL +. costR +. beta in
let nCost, ni = (if cost < cost_1 then (cost,i) else (cost_1,i_1)) in
exhaust (i+1) nCost ni
in exhaust 1 costChnk 0
in aux y n firstChnk 0 0 ;;
(* Desc: Calculate the change in magnitude of the values in a list of floats *)
let difList lst =
let rec aux acc = function
| [] -> []
| hd::[] -> List.rev acc
| hd1::hd2::tl -> aux ((hd2-.hd1)::acc) (hd2::tl)
in aux [] lst;;
(* Desc: Get the position of the maximum value in a list.
Error on empty list. *)
let maxPos lst =
let head::tail = lst in
let rec aux max n i = function
| [] -> n
| hd::[] -> if hd > max then i else n
| hd::tl -> if hd > max then aux hd i (i+1) tl else aux max n (i+1) tl
in aux head 0 1 tail;;
(* Desc: Mean Change Point Feature Extraction
Result: An array of features => [|maxJumpSize; minJumpSize; maxLongestRunPct;
meanRunLength; stableProp; entropyRuns; meanLongestRun; |] *)
let meanChangePoint segLst =
let features = Array.make 5 0.0 in
let (length,localMean) = List.split segLst in
let dif = difList localMean in
Array.set features 0 (List.fold_left (max) neg_infinity dif);
Array.set features 1 (List.fold_left (min) infinity dif);
let totalLength = List.fold_left (+) 0 length in
let maxLength = List.fold_left (max) 0 length in
Array.set features 2 ((float_of_int maxLength)/. (float_of_int totalLength)) ;
Array.set features 3 (List.nth localMean (maxPos length));
Array.set features 4 (float_of_int (List.length segLst));
features;;
(* Desc: Internal Funtion. Calculates the iner sum of the naive DFT *)
let complexInSum n a kf nF=
let num = {Complex.re=0.; im=(-6.283185307) } in
let num2 = {Complex.re=((float_of_int n)*.kf); im=0. } in
let a = {Complex.re=a; im=0. } in
let nF = {Complex.re=nF; im=0. } in
Complex.mul a (Complex.exp (Complex.mul (Complex.div num nF) num2) );;
(* Get the normalized Fourier Transform of a time series, array of floats.
*)
let discreteFourierTransform timeSeries =
let nN = Array.length timeSeries in
let nF = float_of_int nN in
let ft = Array.make nN ({Complex.re=0.; im=0.}) in
for k = 0 to (nN - 1) do
let xk =
let kf = float_of_int k in
let inSum = Array.mapi (fun n a -> complexInSum n a kf nF) timeSeries in
Array.fold_left (Complex.add) ({Complex.re=0.; im=0.}) inSum in
Array.set ft k xk
done;
let result = Array.map (fun a -> (2.*.(Complex.norm a)/. nF) ) ft in
Array.set result 0 (result.(0) /. 2.);
result;;
(* Desc: Extract magnitude and proportion features for up to nH-1 harmonics
from an array of floats.
TODO: Add 15th harmonic if n >15.
*)
let fourierFeatures arr nH =
let wArr = Array.sub arr 0 nH in
let feats = Array.make (nH*2) 0. in
(* Add the absolute magnitud of the first nH values *)
Array.iteri (fun i a -> Array.set feats i a) wArr;
(* Relative amplitude of the nH-1 Harmonics *)
let zeroFreq = wArr.(0) in
Array.iteri (fun i a -> if i>0 then Array.set feats (i+nH-1) (a/.zeroFreq) ) wArr;
(* Add the harmonic with the highest magnitud to the features *)
Array.set wArr 0 0.;
let fundMag = Array.fold_left (max) neg_infinity wArr in
Array.iteri (fun i a -> if a=fundMag then Array.set feats (nH*2 -1) (float_of_int i) ) wArr ;
feats;;
(* Desc: return a dyadic array with length = 2^J for some integer J
and J
TODO: Correct to spread the returned values over the original
*)
let makeDyadic timeSeries =
let vJ = floor (log (float_of_int (Array.length timeSeries)) /. log 2.) in
(vJ, Array.sub timeSeries 0 (int_of_float (2.** vJ)));;
(* Desc: Return the running sum of an array of floats.
*)
let getRunSum arr =
let n = Array.length arr in
let result = Array.make n 0. in
Array.set result 0 (arr.(0));
for i = 1 to (n-1) do
Array.set result i (result.(i-1) +. arr.(i))
done;
result;;
(* Desc: Partition a daydic array in 2^j points per partition.
*)
let partition timeSeries j =
let width = int_of_float (2.**(float_of_int j)) in
let nboxes = (Array.length timeSeries)/width in
let rec aux acc n =
if n>0 then aux ((Array.sub timeSeries ((n-1)*width) width )::acc) (n-1)
else acc
in aux [] nboxes;;
(* Desc: Calculate the slope and intercept using the OLS for a single explanatory variable. *)
let simpleLinearRegression x y =
let n,m = Array.length x, Array.length y in
(if n != m then raise End_of_file); (*make own exception*)
let n = float_of_int n in
let xMean, yMean = meanArr x n, meanArr y n in
let xNum = Array.map (fun xi -> xi -. xMean ) x in
let beta1Num = Array.mapi (fun i a -> a*.(y.(i)-.yMean)) xNum in
let beta1Den = Array.fold_left (+.) 0. (Array.map (fun xi -> xi**2.) xNum) in
let beta1 = (Array.fold_left (+.) 0. beta1Num)/.beta1Den in
let beta0 = yMean -. beta1*.xMean in
(beta1, beta0);;
(* Desc: Calculate the squared *)
let squaredError x y model =
let a, b = model in
Array.mapi (fun i yi -> (yi-. (x.(i)*.a +. b))**2. ) y;;
(* Desc: Make an array with the numbers from 0 to n. *)
let mkArr n =
let nI = int_of_float n in
let result = Array.make nI nan in
for i = 0 to (nI-1) do
Array.set result i (float_of_int i)
done;
result;;
(* Desc: Detrended Fluctuation Analysis of a time series. *)
let detrendedFluctuationAnalysis timeSeries =
let rJ,wArr = makeDyadic timeSeries in
let mean = meanArr wArr (2.**rJ) in
let wArr = Array.map (fun yi -> yi -. mean) wArr in (*? Array.map (fun a -> a/.mean) wArr ?*)
let runningSum = getRunSum wArr in
let stdRes = Array.make (int_of_float (rJ -. 2.)) 0. in
for j= 2 to ((int_of_float rJ) - 1) do
let partLst = partition runningSum j in
let x = mkArr (2.**(float_of_int j)) in
let fits = List.map (fun y -> simpleLinearRegression x y) partLst in
let sqrErrLst = List.map2 (fun y b -> squaredError x y b) partLst fits in
let mSE = meanArr (List.fold_left (Array.append) [||] sqrErrLst) (2.**rJ) in
Array.set stdRes (j-2) (sqrt mSE)
done;
let fx = Array.mapi (fun i a -> log(2.**(float_of_int (i+2))) ) stdRes in
let logRes = Array.map (log) stdRes in
let alpha,_ = simpleLinearRegression fx logRes in
alpha;;
(* General TODO:
Maybe change module name to structureAnalysis
Mean Change Point Modeling TODO:
TODO: Pruned Exact Linear Time (PELT) algorithm
Feature Extraction:
meanRunLength
stableProp
entropyRuns
*)