64
64
# ' args = list(list(
65
65
# ' detect_negatives = TRUE,
66
66
# ' detection_multiplier = 2.5,
67
- # ' seasonal_period = NULL
67
+ # ' seasonal_period = 7,
68
+ # ' seasonal_as_residual = TRUE
68
69
# ' )),
69
- # ' abbr = "stl_nonseasonal "
70
+ # ' abbr = "stl_reseasonal "
70
71
# ' )
71
72
# ' )
72
73
# '
@@ -216,18 +217,28 @@ detect_outlr_rm <- function(x = seq_along(y), y, n = 21,
216
217
# ' @param n_trend Number of time steps to use in the rolling window for trend.
217
218
# ' Default is 21.
218
219
# ' @param n_seasonal Number of time steps to use in the rolling window for
219
- # ' seasonality. Default is 21.
220
+ # ' seasonality. Default is 21. Can also be the string "periodic". See
221
+ # ' `s.window` in [`stats::stl`].
220
222
# ' @param n_threshold Number of time steps to use in rolling window for the IQR
221
223
# ' outlier thresholds.
222
- # ' @param seasonal_period Integer specifying period of seasonality. For example,
223
- # ' for daily data, a period 7 means weekly seasonality. The default is `NULL`,
224
- # ' meaning that no seasonal term will be included in the STL decomposition.
224
+ # ' @param seasonal_period Integer specifying period of "seasonality". For
225
+ # ' example, for daily data, a period 7 means weekly seasonality. It must be
226
+ # ' strictly larger than 1. Also impacts the size of the low-pass filter
227
+ # ' window; see `l.window` in [`stats::stl`].
228
+ # ' @param seasonal_as_residual Boolean specifying whether the seasonal(/weekly)
229
+ # ' component should be treated as part of the residual component instead of as
230
+ # ' part of the predictions. The default, FALSE, treats them as part of the
231
+ # ' predictions, so large seasonal(/weekly) components will not lead to
232
+ # ' flagging points as outliers. `TRUE` may instead consider the extrema of
233
+ # ' large seasonal variations to be outliers; `n_seasonal` and
234
+ # ' `seasonal_period` will still have an impact on the result, though, by
235
+ # ' impacting the estimation of the trend component.
225
236
# ' @template outlier-detection-options
226
237
# ' @template detect-outlr-return
227
238
# '
228
- # ' @details The STL decomposition is computed using the `feasts` package . Once
239
+ # ' @details The STL decomposition is computed using [`stats::stl()`] . Once
229
240
# ' computed, the outlier detection method is analogous to the rolling median
230
- # ' method in `detect_outlr_rm()`, except with the fitted values and residuals
241
+ # ' method in [ `detect_outlr_rm()`] , except with the fitted values and residuals
231
242
# ' from the STL decomposition taking the place of the rolling median and
232
243
# ' residuals to the rolling median, respectively.
233
244
# '
@@ -252,12 +263,34 @@ detect_outlr_stl <- function(x = seq_along(y), y,
252
263
n_trend = 21 ,
253
264
n_seasonal = 21 ,
254
265
n_threshold = 21 ,
255
- seasonal_period = NULL ,
266
+ seasonal_period ,
267
+ seasonal_as_residual = FALSE ,
256
268
log_transform = FALSE ,
257
269
detect_negatives = FALSE ,
258
270
detection_multiplier = 2 ,
259
271
min_radius = 0 ,
260
272
replacement_multiplier = 0 ) {
273
+ if (dplyr :: n_distinct(x ) != length(y )) {
274
+ cli_abort(" `x` contains duplicate values. (If being run on a column in an
275
+ `epi_df`, did you group by relevant key variables?)" )
276
+ }
277
+ if (length(y ) < = 1L ) {
278
+ cli_abort(" `y` has length {length(y)}; that's definitely too little for
279
+ STL. (If being run in a `mutate()` or `epi_slide()`, check
280
+ whether you grouped by too many variables; you should not be
281
+ grouping by `time_value` in particular.)" )
282
+ }
283
+ distinct_x_skips <- unique(diff(x ))
284
+ if (diff(range(distinct_x_skips )) > 1e-4 * mean(distinct_x_skips )) {
285
+ cli_abort(" `x` does not appear to have regular spacing; consider filling in
286
+ gaps with imputed values (STL does not allow NAs)." )
287
+ }
288
+ if (is.unsorted(x )) { # <- for performance in common (sorted) case
289
+ o <- order(x )
290
+ x <- x [o ]
291
+ y <- y [o ]
292
+ }
293
+
261
294
# Transform if requested
262
295
if (log_transform ) {
263
296
# Replace all negative values with 0
@@ -266,32 +299,22 @@ detect_outlr_stl <- function(x = seq_along(y), y,
266
299
y <- log(y + offset )
267
300
}
268
301
269
- # Make a tsibble for fabletools, setup and run STL
270
- z_tsibble <- tsibble :: tsibble(x = x , y = y , index = x )
271
-
272
- stl_formula <- y ~ trend(window = n_trend ) +
273
- season(period = seasonal_period , window = n_seasonal )
302
+ assert_int(seasonal_period , lower = 2L )
303
+ assert_logical(seasonal_as_residual , len = 1L , any.missing = FALSE )
274
304
275
- stl_components <- z_tsibble %> %
276
- fabletools :: model(feasts :: STL(stl_formula , robust = TRUE )) %> %
277
- generics :: components() %> %
305
+ yts <- stats :: ts(y , frequency = seasonal_period )
306
+ stl_comp <- stats :: stl(yts ,
307
+ t.window = n_trend , s.window = n_seasonal ,
308
+ robust = TRUE
309
+ )$ time.series %> %
278
310
tibble :: as_tibble() %> %
279
- dplyr :: select(.data $ trend : .data $ remainder ) %> % #
280
- dplyr :: rename_with(~ " seasonal" , tidyselect :: starts_with(" season" )) %> %
281
311
dplyr :: rename(resid = .data $ remainder )
282
312
283
313
# Allocate the seasonal term from STL to either fitted or resid
284
- if (! is.null(seasonal_period )) {
285
- stl_components <- stl_components %> %
286
- dplyr :: mutate(
287
- fitted = .data $ trend + .data $ seasonal
288
- )
314
+ if (! seasonal_as_residual ) {
315
+ stl_comp <- dplyr :: mutate(stl_comp , fitted = .data $ trend + .data $ seasonal )
289
316
} else {
290
- stl_components <- stl_components %> %
291
- dplyr :: mutate(
292
- fitted = .data $ trend ,
293
- resid = .data $ seasonal + resid
294
- )
317
+ stl_comp <- dplyr :: mutate(stl_comp , fitted = .data $ trend , resid = .data $ seasonal + .data $ resid )
295
318
}
296
319
297
320
# Detect negatives if requested
@@ -306,10 +329,7 @@ detect_outlr_stl <- function(x = seq_along(y), y,
306
329
307
330
# Calculate lower and upper thresholds and replacement value
308
331
z <- z %> %
309
- dplyr :: mutate(
310
- fitted = stl_components $ fitted ,
311
- resid = stl_components $ resid
312
- ) %> %
332
+ dplyr :: mutate(fitted = stl_comp $ fitted , resid = stl_comp $ resid ) %> %
313
333
roll_iqr(
314
334
n = n_threshold ,
315
335
detection_multiplier = detection_multiplier ,
@@ -337,7 +357,12 @@ roll_iqr <- function(z, n, detection_multiplier, min_radius,
337
357
as_type <- as.numeric
338
358
}
339
359
340
- epi_slide(z , roll_iqr = stats :: IQR(resid ), before = floor((n - 1 ) / 2 ), after = ceiling((n - 1 ) / 2 )) %> %
360
+ z %> %
361
+ epi_slide(
362
+ roll_iqr = stats :: IQR(resid ),
363
+ before = floor((n - 1 ) / 2 ),
364
+ after = ceiling((n - 1 ) / 2 )
365
+ ) %> %
341
366
dplyr :: mutate(
342
367
lower = pmax(
343
368
min_lower ,
0 commit comments