From 82a762aa5a79e1188d90110d075860838e272fb7 Mon Sep 17 00:00:00 2001 From: Floris Kraak Date: Mon, 18 Apr 2016 15:31:37 +0200 Subject: [PATCH 1/2] Add period values for 'sec' and 'ms' granularities, as it appears they are missing. If the granularity detection results in "sec" or "ms" granularity detect_anons will blow up with a message stating 'period' was not provided. Which is correct. This provides at least some defaults. This is a bit tricky, since the number of samples really isn't guaranteed to to be 1 measurement per whatever the 'gran' variable says it is. However, this code appears to assume that. Leading to "fun" when the precision is in ms but the measurements fire a lot less often - say, once per five minutes (that's the testcase in question). There are two ways to deal with this, neither of them implemented by this patch: - Look at the delta between the first and the last timestamp and the number of records and make an educated guess from there. - Add 'period' as a function argument and let the caller decide, using the hardcoded values simply as educated guesses. These approaches can be combined, but that is left as an exercise for the reader. --- R/ts_anom_detection.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/ts_anom_detection.R b/R/ts_anom_detection.R index a7e3abe..b6c6413 100644 --- a/R/ts_anom_detection.R +++ b/R/ts_anom_detection.R @@ -166,11 +166,15 @@ AnomalyDetectionTs <- function(x, max_anoms = 0.10, direction = 'pos', x <- format_timestamp(aggregate(x[2], format(x[1], "%Y-%m-%d %H:%M:00"), eval(parse(text="sum")))) } + ## This is a bit tricky, since the number of samples really isn't guaranteed to be 1 measurement per whatever the 'gran' variable says it is. + ## Either we'll need to do something smarter (look at the delta between the first and the last timestamp and count the number of rows) or + ## alternatively, simply make 'period' a function argument and use these values as defaults. period = switch(gran, min = 1440, + ms = 1000, + sec = 60*60, hr = 24, - # if the data is daily, then we need to bump the period to weekly to get multiple examples - day = 7) + day = 7) # if the data is daily, then we need to bump the period to weekly to get multiple examples num_obs <- length(x[[2]]) if(max_anoms < 1/num_obs){ From b58b8bbf0d379a7d5eaad74c2ebe2f9d332ef192 Mon Sep 17 00:00:00 2001 From: Floris Kraak Date: Mon, 18 Apr 2016 15:45:05 +0200 Subject: [PATCH 2/2] Assume 'millisecond' granularity also needs formatting the way 'second' granularity does. --- R/ts_anom_detection.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ts_anom_detection.R b/R/ts_anom_detection.R index b6c6413..3c95c7a 100644 --- a/R/ts_anom_detection.R +++ b/R/ts_anom_detection.R @@ -162,7 +162,7 @@ AnomalyDetectionTs <- function(x, max_anoms = 0.10, direction = 'pos', } # Aggregate data to minutely if secondly - if(gran == "sec"){ + if(gran == "sec" || gran == "ms"){ x <- format_timestamp(aggregate(x[2], format(x[1], "%Y-%m-%d %H:%M:00"), eval(parse(text="sum")))) }