@@ -551,6 +551,164 @@ get_before_after_from_window <- function(window_size, align, time_type) {
551
551
return (list (before = before , after = after ))
552
552
}
553
553
554
+
555
+ # ' Information about upstream (`{data.table}`/`{slider}`) slide functions
556
+ # '
557
+ # ' Underlies [`upstream_slide_f_info`].
558
+ # '
559
+ # ' @keywords internal
560
+ upstream_slide_f_possibilities <- tibble :: tribble(
561
+ ~ f , ~ package , ~ namer ,
562
+ frollmean , " data.table" , ~ if (is.logical(.x )) " prop" else " av" ,
563
+ frollsum , " data.table" , ~ if (is.logical(.x )) " count" else " sum" ,
564
+ frollapply , " data.table" , ~ " slide" ,
565
+ slide_sum , " slider" , ~ if (is.logical(.x )) " count" else " sum" ,
566
+ slide_prod , " slider" , ~ " prod" ,
567
+ slide_mean , " slider" , ~ if (is.logical(.x )) " prop" else " av" ,
568
+ slide_min , " slider" , ~ " min" ,
569
+ slide_max , " slider" , ~ " max" ,
570
+ slide_all , " slider" , ~ " all" ,
571
+ slide_any , " slider" , ~ " any" ,
572
+ )
573
+
574
+ # ' Validate & get information about an upstream slide function
575
+ # '
576
+ # ' @param .f function such as `data.table::frollmean` or `slider::slide_mean`;
577
+ # ' must appear in [`upstream_slide_f_possibilities`]
578
+ # ' @return named list with two elements: `from_package`, a string containing the
579
+ # ' upstream package name ("data.table" or "slider"), and `namer`, a function
580
+ # ' that takes a column to call `.f` on and outputs a basic name or
581
+ # ' abbreviation for what operation `.f` represents on that kind of column
582
+ # ' (e.g., "sum", "av", "count").
583
+ # '
584
+ # ' @keywords internal
585
+ upstream_slide_f_info <- function (.f ) {
586
+ # Check that slide function `.f` is one of those short-listed from
587
+ # `data.table` and `slider` (or a function that has the exact same definition,
588
+ # e.g. if the function has been reexported or defined locally). Extract some
589
+ # metadata. `namer` will be mapped over columns (.x will be a column, not the
590
+ # entire edf).
591
+ f_info_row <- upstream_slide_f_possibilities %> %
592
+ filter(map_lgl(.data $ f , ~ identical(.f , .x )))
593
+ if (nrow(f_info_row ) == 0L ) {
594
+ # `f` is from somewhere else and not supported
595
+ cli_abort(
596
+ c(
597
+ " problem with {rlang::expr_label(rlang::caller_arg(f))}" ,
598
+ " i" = " `f` must be one of `data.table`'s rolling functions (`frollmean`,
599
+ `frollsum`, `frollapply`. See `?data.table::roll`) or one of
600
+ `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
601
+ etc. See `?slider::\` summary-slide\` ` for more options)."
602
+ ),
603
+ class = " epiprocess__epi_slide_opt__unsupported_slide_function" ,
604
+ epiprocess__f = .f
605
+ )
606
+ }
607
+ if (nrow(f_info_row ) > 1L ) {
608
+ cli_abort(' epiprocess internal error: looking up `.f` in table of possible
609
+ functions yielded multiple matches. Please report it using "New
610
+ issue" at https://github.com/cmu-delphi/epiprocess/issues, using
611
+ reprex::reprex to provide a minimal reproducible example.' )
612
+ }
613
+ f_from_package <- f_info_row $ package
614
+ list (
615
+ from_package = f_from_package ,
616
+ namer = unwrap(f_info_row $ namer )
617
+ )
618
+ }
619
+
620
+ # ' Calculate input and output column names for an `{epiprocess}` [`dplyr::across`]-like operations
621
+ # '
622
+ # ' @param .x data.frame to perform input column tidyselection on
623
+ # ' @param time_type as in [`new_epi_df`]
624
+ # ' @param col_names_quo enquosed input column tidyselect expression
625
+ # ' @param .f_namer function taking an input column object and outputting a name
626
+ # ' for a corresponding output column; see [`upstream_slide_f_info`]
627
+ # ' @param .window_size as in [`epi_slide_opt`]
628
+ # ' @param .align as in [`epi_slide_opt`]
629
+ # ' @param .prefix as in [`epi_slide_opt`]
630
+ # ' @param .suffix as in [`epi_slide_opt`]
631
+ # ' @param .new_col_names as in [`epi_slide_opt`]
632
+ # ' @return named list with two elements: `input_col_names`, chr, subset of
633
+ # ' `names(.x)`; and `output_colnames`, chr, same length as `input_col_names`
634
+ # '
635
+ # ' @keywords internal
636
+ across_ish_names_info <- function (.x , time_type , col_names_quo , .f_namer , .window_size , .align , .prefix , .suffix , .new_col_names ) {
637
+ # The position of a given column can be differ between input `.x` and
638
+ # `.data_group` since the grouping step by default drops grouping columns.
639
+ # To avoid rerunning `eval_select` for every `.data_group`, convert
640
+ # positions of user-provided `col_names` into string column names. We avoid
641
+ # using `names(pos)` directly for robustness and in case we later want to
642
+ # allow users to rename fields via tidyselection.
643
+ pos <- eval_select(col_names_quo , data = .x , allow_rename = FALSE )
644
+ input_col_names <- names(.x )[pos ]
645
+
646
+ # Handle output naming
647
+ if ((! is.null(.prefix ) || ! is.null(.suffix )) && ! is.null(.new_col_names )) {
648
+ cli_abort(
649
+ " Can't use both .prefix/.suffix and .new_col_names at the same time." ,
650
+ class = " epiprocess__epi_slide_opt_incompatible_naming_args"
651
+ )
652
+ }
653
+ assert_string(.prefix , null.ok = TRUE )
654
+ assert_string(.suffix , null.ok = TRUE )
655
+ assert_character(.new_col_names , len = length(input_col_names ), null.ok = TRUE )
656
+ if (is.null(.prefix ) && is.null(.suffix ) && is.null(.new_col_names )) {
657
+ .suffix <- " _{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
658
+ # ^ does not account for any arguments specified to underlying functions via
659
+ # `...` such as `na.rm =`, nor does it distinguish between functions from
660
+ # different packages accomplishing the same type of computation. Those are
661
+ # probably only set one way per task, so this probably produces cleaner
662
+ # names without clashes (though maybe some confusion if switching between
663
+ # code with different settings).
664
+ }
665
+ if (! is.null(.prefix ) || ! is.null(.suffix )) {
666
+ .prefix <- .prefix %|| % " "
667
+ .suffix <- .suffix %|| % " "
668
+ if (identical(.window_size , Inf )) {
669
+ n <- " running_"
670
+ time_unit_abbr <- " "
671
+ align_abbr <- " "
672
+ } else {
673
+ n <- time_delta_to_n_steps(.window_size , time_type )
674
+ time_unit_abbr <- time_type_unit_abbr(time_type )
675
+ align_abbr <- c(right = " " , center = " c" , left = " l" )[[.align ]]
676
+ }
677
+ glue_env <- rlang :: env(
678
+ .n = n ,
679
+ .time_unit_abbr = time_unit_abbr ,
680
+ .align_abbr = align_abbr ,
681
+ .f_abbr = purrr :: map_chr(.x [, c(input_col_names )], .f_namer ), # compat between DT and tbl selection
682
+ quo_get_env(col_names_quo )
683
+ )
684
+ .new_col_names <- unclass(
685
+ glue(.prefix , .envir = glue_env ) +
686
+ input_col_names +
687
+ glue(.suffix , .envir = glue_env )
688
+ )
689
+ } else {
690
+ # `.new_col_names` was provided by user; we don't need to do anything.
691
+ }
692
+ if (any(.new_col_names %in% names(.x ))) {
693
+ cli_abort(c(
694
+ " Naming conflict between new columns and existing columns" ,
695
+ " x" = " Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
696
+ ), class = " epiprocess__epi_slide_opt_old_new_name_conflict" )
697
+ }
698
+ if (anyDuplicated(.new_col_names )) {
699
+ cli_abort(c(
700
+ " New column names contain duplicates" ,
701
+ " x" = " Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
702
+ ), class = " epiprocess__epi_slide_opt_new_name_duplicated" )
703
+ }
704
+ output_col_names <- .new_col_names
705
+
706
+ return (list (
707
+ input_col_names = input_col_names ,
708
+ output_col_names = output_col_names
709
+ ))
710
+ }
711
+
554
712
# ' Optimized slide functions for common cases
555
713
# '
556
714
# ' @description `epi_slide_opt` allows sliding an n-timestep [data.table::froll]
@@ -750,59 +908,12 @@ epi_slide_opt <- function(
750
908
# Check for duplicated time values within groups
751
909
assert(check_ukey_unique(ungroup(.x ), c(group_vars(.x ), " time_value" )))
752
910
753
- # The position of a given column can be differ between input `.x` and
754
- # `.data_group` since the grouping step by default drops grouping columns.
755
- # To avoid rerunning `eval_select` for every `.data_group`, convert
756
- # positions of user-provided `col_names` into string column names. We avoid
757
- # using `names(pos)` directly for robustness and in case we later want to
758
- # allow users to rename fields via tidyselection.
911
+ # Validate/process .col_names, .f:
759
912
col_names_quo <- enquo(.col_names )
760
- pos <- eval_select(col_names_quo , data = .x , allow_rename = FALSE )
761
- col_names_chr <- names(.x )[pos ]
762
-
763
- # Check that slide function `.f` is one of those short-listed from
764
- # `data.table` and `slider` (or a function that has the exact same definition,
765
- # e.g. if the function has been reexported or defined locally). Extract some
766
- # metadata. `namer` will be mapped over columns (.x will be a column, not the
767
- # entire edf).
768
- f_possibilities <-
769
- tibble :: tribble(
770
- ~ f , ~ package , ~ namer ,
771
- frollmean , " data.table" , ~ if (is.logical(.x )) " prop" else " av" ,
772
- frollsum , " data.table" , ~ if (is.logical(.x )) " count" else " sum" ,
773
- frollapply , " data.table" , ~ " slide" ,
774
- slide_sum , " slider" , ~ if (is.logical(.x )) " count" else " sum" ,
775
- slide_prod , " slider" , ~ " prod" ,
776
- slide_mean , " slider" , ~ if (is.logical(.x )) " prop" else " av" ,
777
- slide_min , " slider" , ~ " min" ,
778
- slide_max , " slider" , ~ " max" ,
779
- slide_all , " slider" , ~ " all" ,
780
- slide_any , " slider" , ~ " any" ,
781
- )
782
- f_info <- f_possibilities %> %
783
- filter(map_lgl(.data $ f , ~ identical(.f , .x )))
784
- if (nrow(f_info ) == 0L ) {
785
- # `f` is from somewhere else and not supported
786
- cli_abort(
787
- c(
788
- " problem with {rlang::expr_label(rlang::caller_arg(f))}" ,
789
- " i" = " `f` must be one of `data.table`'s rolling functions (`frollmean`,
790
- `frollsum`, `frollapply`. See `?data.table::roll`) or one of
791
- `slider`'s specialized sliding functions (`slide_mean`, `slide_sum`,
792
- etc. See `?slider::\` summary-slide\` ` for more options)."
793
- ),
794
- class = " epiprocess__epi_slide_opt__unsupported_slide_function" ,
795
- epiprocess__f = .f
796
- )
797
- }
798
- if (nrow(f_info ) > 1L ) {
799
- cli_abort(' epiprocess internal error: looking up `.f` in table of possible
800
- functions yielded multiple matches. Please report it using "New
801
- issue" at https://github.com/cmu-delphi/epiprocess/issues, using
802
- reprex::reprex to provide a minimal reproducible example.' )
803
- }
804
- f_from_package <- f_info $ package
913
+ f_info <- upstream_slide_f_info(.f )
914
+ f_from_package <- f_info $ from_package
805
915
916
+ # Validate/process .ref_time_values:
806
917
user_provided_rtvs <- ! is.null(.ref_time_values )
807
918
if (! user_provided_rtvs ) {
808
919
.ref_time_values <- unique(.x $ time_value )
@@ -832,65 +943,10 @@ epi_slide_opt <- function(
832
943
validate_slide_window_arg(.window_size , time_type )
833
944
window_args <- get_before_after_from_window(.window_size , .align , time_type )
834
945
835
- # Handle output naming
836
- if ((! is.null(.prefix ) || ! is.null(.suffix )) && ! is.null(.new_col_names )) {
837
- cli_abort(
838
- " Can't use both .prefix/.suffix and .new_col_names at the same time." ,
839
- class = " epiprocess__epi_slide_opt_incompatible_naming_args"
840
- )
841
- }
842
- assert_string(.prefix , null.ok = TRUE )
843
- assert_string(.suffix , null.ok = TRUE )
844
- assert_character(.new_col_names , len = length(col_names_chr ), null.ok = TRUE )
845
- if (is.null(.prefix ) && is.null(.suffix ) && is.null(.new_col_names )) {
846
- .suffix <- " _{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"
847
- # ^ does not account for any arguments specified to underlying functions via
848
- # `...` such as `na.rm =`, nor does it distinguish between functions from
849
- # different packages accomplishing the same type of computation. Those are
850
- # probably only set one way per task, so this probably produces cleaner
851
- # names without clashes (though maybe some confusion if switching between
852
- # code with different settings).
853
- }
854
- if (! is.null(.prefix ) || ! is.null(.suffix )) {
855
- .prefix <- .prefix %|| % " "
856
- .suffix <- .suffix %|| % " "
857
- if (identical(.window_size , Inf )) {
858
- n <- " running_"
859
- time_unit_abbr <- " "
860
- align_abbr <- " "
861
- } else {
862
- n <- time_delta_to_n_steps(.window_size , time_type )
863
- time_unit_abbr <- time_type_unit_abbr(time_type )
864
- align_abbr <- c(right = " " , center = " c" , left = " l" )[[.align ]]
865
- }
866
- glue_env <- rlang :: env(
867
- .n = n ,
868
- .time_unit_abbr = time_unit_abbr ,
869
- .align_abbr = align_abbr ,
870
- .f_abbr = purrr :: map_chr(.x [col_names_chr ], unwrap(f_info $ namer )),
871
- quo_get_env(col_names_quo )
872
- )
873
- .new_col_names <- unclass(
874
- glue(.prefix , .envir = glue_env ) +
875
- col_names_chr +
876
- glue(.suffix , .envir = glue_env )
877
- )
878
- } else {
879
- # `.new_col_names` was provided by user; we don't need to do anything.
880
- }
881
- if (any(.new_col_names %in% names(.x ))) {
882
- cli_abort(c(
883
- " Naming conflict between new columns and existing columns" ,
884
- " x" = " Overlapping names: {format_varnames(intersect(.new_col_names, names(.x)))}"
885
- ), class = " epiprocess__epi_slide_opt_old_new_name_conflict" )
886
- }
887
- if (anyDuplicated(.new_col_names )) {
888
- cli_abort(c(
889
- " New column names contain duplicates" ,
890
- " x" = " Duplicated names: {format_varnames(unique(.new_col_names[duplicated(.new_col_names)]))}"
891
- ), class = " epiprocess__epi_slide_opt_new_name_duplicated" )
892
- }
893
- result_col_names <- .new_col_names
946
+ # Handle output naming:
947
+ names_info <- across_ish_names_info(.x , time_type , col_names_quo , f_info $ namer , .window_size , .align , .prefix , .suffix , .new_col_names )
948
+ input_col_names <- names_info $ input_col_names
949
+ output_col_names <- names_info $ output_col_names
894
950
895
951
# Make a complete date sequence between min(.x$time_value) and max(.x$time_value).
896
952
date_seq_list <- full_date_seq(.x , window_args $ before , window_args $ after , time_type )
@@ -935,23 +991,23 @@ epi_slide_opt <- function(
935
991
# be; shift results to the left by `after` timesteps.
936
992
if (window_args $ before != Inf ) {
937
993
window_size <- window_args $ before + window_args $ after + 1L
938
- roll_output <- .f(x = .data_group [, col_names_chr ], n = window_size , ... )
994
+ roll_output <- .f(x = .data_group [, input_col_names ], n = window_size , ... )
939
995
} else {
940
996
window_size <- list (seq_along(.data_group $ time_value ))
941
- roll_output <- .f(x = .data_group [, col_names_chr ], n = window_size , adaptive = TRUE , ... )
997
+ roll_output <- .f(x = .data_group [, input_col_names ], n = window_size , adaptive = TRUE , ... )
942
998
}
943
999
if (window_args $ after > = 1 ) {
944
- .data_group [, result_col_names ] <- purrr :: map(roll_output , function (.x ) {
1000
+ .data_group [, output_col_names ] <- purrr :: map(roll_output , function (.x ) {
945
1001
c(.x [(window_args $ after + 1L ): length(.x )], rep(NA , window_args $ after ))
946
1002
})
947
1003
} else {
948
- .data_group [, result_col_names ] <- roll_output
1004
+ .data_group [, output_col_names ] <- roll_output
949
1005
}
950
1006
}
951
1007
if (f_from_package == " slider" ) {
952
- for (i in seq_along(col_names_chr )) {
953
- .data_group [, result_col_names [i ]] <- .f(
954
- x = .data_group [[col_names_chr [i ]]],
1008
+ for (i in seq_along(input_col_names )) {
1009
+ .data_group [, output_col_names [i ]] <- .f(
1010
+ x = .data_group [[input_col_names [i ]]],
955
1011
before = as.numeric(window_args $ before ),
956
1012
after = as.numeric(window_args $ after ),
957
1013
...
@@ -970,7 +1026,7 @@ epi_slide_opt <- function(
970
1026
group_by(!!! .x_orig_groups )
971
1027
972
1028
if (.all_rows ) {
973
- result [! vec_in(result $ time_value , ref_time_values ), result_col_names ] <- NA
1029
+ result [! vec_in(result $ time_value , ref_time_values ), output_col_names ] <- NA
974
1030
} else if (user_provided_rtvs ) {
975
1031
result <- result [vec_in(result $ time_value , ref_time_values ), ]
976
1032
}
0 commit comments