@@ -6,8 +6,9 @@ source("scripts/targets-exploration-common.R")
6
6
hhs_signal <- " confirmed_admissions_covid_1d"
7
7
if (! exists(" ref_time_values_" )) {
8
8
# Alternatively you can let slide_forecaster figure out ref_time_values
9
- start_date <- as.Date(" 2023-10-04 " )
9
+ start_date <- as.Date(" 2023-11-08 " )
10
10
end_date <- as.Date(" 2024-04-24" )
11
+ # end_date <- start_date + 7
11
12
date_step <- 7L
12
13
ref_time_values_ <- seq.Date(start_date , end_date , by = date_step )
13
14
}
@@ -62,12 +63,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
62
63
),
63
64
pop_scaling = FALSE ,
64
65
scale_method = " quantile" ,
65
- center_method = " median" ,
66
- nonlin_method = " quart_root" ,
67
- filter_source = " " ,
68
- filter_agg_level = " " ,
69
- n_training = Inf ,
70
- drop_non_seasons = FALSE ,
66
+ n_training = Inf
71
67
),
72
68
expand_grid(
73
69
forecaster = " scaled_pop" ,
@@ -93,12 +89,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
93
89
),
94
90
pop_scaling = FALSE ,
95
91
scale_method = " quantile" ,
96
- center_method = " median" ,
97
- nonlin_method = " quart_root" ,
98
- filter_source = " " ,
99
- filter_agg_level = " " ,
100
- n_training = Inf ,
101
- drop_non_seasons = FALSE ,
92
+ n_training = Inf
102
93
),
103
94
expand_grid(
104
95
forecaster = " scaled_pop" ,
@@ -124,12 +115,7 @@ forecaster_parameter_combinations_ <- rlang::list2(
124
115
),
125
116
pop_scaling = FALSE ,
126
117
scale_method = " quantile" ,
127
- center_method = " median" ,
128
- nonlin_method = " quart_root" ,
129
- filter_source = " " ,
130
- filter_agg_level = " " ,
131
- n_training = Inf ,
132
- drop_non_seasons = FALSE ,
118
+ n_training = Inf
133
119
)
134
120
),
135
121
scled_pop_season = tidyr :: expand_grid(
@@ -141,7 +127,13 @@ forecaster_parameter_combinations_ <- rlang::list2(
141
127
),
142
128
pop_scaling = FALSE ,
143
129
n_training = Inf ,
144
- seasonal_method = list (c(" covid" ), c(" window" ), c(" covid" , " window" ), c(" climatological" ), c(" climatological" , " window" ))
130
+ seasonal_method = list (
131
+ c(" covid" ),
132
+ c(" window" ),
133
+ c(" covid" , " window" ),
134
+ c(" climatological" ),
135
+ c(" climatological" , " window" )
136
+ )
145
137
)
146
138
) %> %
147
139
map(function (x ) {
@@ -178,16 +170,16 @@ scaled_pop_scaled <- list(
178
170
smooth_scaled <- list (
179
171
forecaster = " smoothed_scaled" ,
180
172
trainer = " quantreg" ,
181
- lags =
182
- # list(smoothed, sd)
183
- list (c(0 , 7 , 14 , 21 , 28 ), c(0 )),
173
+ # lags = list(smoothed, sd)
174
+ lags = list (c(0 , 7 , 14 , 21 , 28 ), c(0 )),
184
175
smooth_width = as.difftime(2 , units = " weeks" ),
185
176
sd_width = as.difftime(4 , units = " weeks" ),
186
177
sd_mean_width = as.difftime(2 , units = " weeks" ),
187
178
pop_scaling = TRUE ,
188
179
n_training = Inf
189
180
)
190
181
# Human-readable object to be used for inspecting the ensembles in the pipeline.
182
+ # fmt: skip
191
183
ensemble_parameter_combinations_ <- tribble(
192
184
~ ensemble , ~ ensemble_args , ~ forecasters ,
193
185
# mean forecaster
@@ -240,7 +232,12 @@ ensemble_parameter_combinations_ <- tribble(
240
232
) %> %
241
233
add_id(exclude = " forecasters" )
242
234
# spoofing ensembles for right now
243
- ensemble_parameter_combinations_ <- tibble :: tibble(id = character (), ensemble = character (), ensemble_args = character (), children_ids = character ())
235
+ ensemble_parameter_combinations_ <- tibble :: tibble(
236
+ id = character (),
237
+ ensemble = character (),
238
+ ensemble_args = character (),
239
+ children_ids = character ()
240
+ )
244
241
# Check that every ensemble dependent is actually included.
245
242
missing_forecasters <- setdiff(
246
243
ensemble_parameter_combinations_ %> % pull(children_ids ) %> % unlist() %> % unique(),
@@ -272,7 +269,7 @@ rlang::list2(
272
269
tar_target(
273
270
name = hhs_archive_data_asof ,
274
271
command = {
275
- get_health_data(as.Date(ref_time_values )) %> %
272
+ get_health_data(as.Date(ref_time_values ), disease = " covid " ) %> %
276
273
mutate(version = as.Date(ref_time_values )) %> %
277
274
relocate(geo_value , time_value , version , hhs )
278
275
},
@@ -348,6 +345,9 @@ rlang::list2(
348
345
# weekly data is indexed from the start of the week
349
346
mutate(time_value = time_value + 6 - time_value_adjust ) %> %
350
347
mutate(version = time_value ) %> %
348
+ # Always convert to data.frame after dplyr operations on data.table.
349
+ # https://github.com/cmu-delphi/epiprocess/issues/618
350
+ as.data.frame() %> %
351
351
as_epi_archive(compactify = TRUE )
352
352
nssp_archive
353
353
}
@@ -380,39 +380,52 @@ rlang::list2(
380
380
geo_type = " hhs" ,
381
381
geo_values = " *"
382
382
)
383
- google_symptoms_archive_min <-
384
- google_symptoms_state_archive %> %
383
+ google_symptoms_archive_min <- google_symptoms_state_archive %> %
385
384
bind_rows(google_symptoms_hhs_archive ) %> %
386
385
select(geo_value , time_value , value ) %> %
387
386
daily_to_weekly() %> %
388
387
mutate(version = time_value ) %> %
389
- as_epi_archive(compactify = TRUE )
390
- google_symptoms_archive_min $ DT %> %
391
388
filter(! is.na(value )) %> %
392
389
relocate(geo_value , time_value , version , value ) %> %
390
+ as.data.frame() %> %
393
391
as_epi_archive(compactify = TRUE )
394
392
})
395
- all_of_them [[1 ]]$ DT %<> % rename(google_symptoms_4_bronchitis = value )
396
- all_of_them [[2 ]]$ DT %<> % rename(google_symptoms_5_ageusia = value )
393
+ all_of_them [[1 ]] <- all_of_them [[1 ]]$ DT %> %
394
+ rename(google_symptoms_4_bronchitis = value ) %> %
395
+ # Always convert to data.frame after dplyr operations on data.table.
396
+ # https://github.com/cmu-delphi/epiprocess/issues/618
397
+ as.data.frame() %> %
398
+ as_epi_archive(compactify = TRUE )
399
+ all_of_them [[2 ]] <- all_of_them [[2 ]]$ DT %> %
400
+ rename(google_symptoms_5_ageusia = value ) %> %
401
+ # Always convert to data.frame after dplyr operations on data.table.
402
+ # https://github.com/cmu-delphi/epiprocess/issues/618
403
+ as.data.frame() %> %
404
+ as_epi_archive(compactify = TRUE )
397
405
google_symptoms_archive <- epix_merge(all_of_them [[1 ]], all_of_them [[2 ]])
398
406
google_symptoms_archive <- google_symptoms_archive $ DT %> %
399
407
mutate(google_symptoms = google_symptoms_4_bronchitis + google_symptoms_5_ageusia ) %> %
408
+ # Always convert to data.frame after dplyr operations on data.table.
409
+ # https://github.com/cmu-delphi/epiprocess/issues/618
410
+ as.data.frame() %> %
400
411
as_epi_archive(compactify = TRUE )
401
- # not just using dplyr to allow for na.rm
402
- google_symptoms_archive $ DT $ google_symptoms <-
403
- rowSums(google_symptoms_archive $ DT [, c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" )],
404
- na.rm = TRUE
405
- )
406
412
pre_pipeline <- google_symptoms_archive %> %
407
413
epix_as_of(as.Date(" 2023-10-04" )) %> %
408
414
mutate(source = " none" )
409
- colnames <- c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" , " google_symptoms " )
415
+ colnames <- c(" google_symptoms_4_bronchitis" , " google_symptoms_5_ageusia" )
410
416
for (colname in colnames ) {
411
417
learned_params <- calculate_whitening_params(pre_pipeline , colname = colname )
412
418
google_symptoms_archive $ DT %<> % data_whitening(colname = colname , learned_params , join_cols = " geo_value" )
413
419
}
414
420
google_symptoms_archive $ DT %> %
421
+ mutate(
422
+ google_symptoms = ifelse(is.na(google_symptoms_4_bronchitis ), 0 , google_symptoms_4_bronchitis ) +
423
+ ifelse(is.na(google_symptoms_5_ageusia ), 0 , google_symptoms_5_ageusia )
424
+ ) %> %
415
425
select(- starts_with(" source" )) %> %
426
+ # Always convert to data.frame after dplyr operations on data.table
427
+ # https://github.com/cmu-delphi/epiprocess/issues/618
428
+ as.data.frame() %> %
416
429
as_epi_archive(compactify = TRUE )
417
430
}
418
431
),
@@ -479,8 +492,14 @@ rlang::list2(
479
492
nwss <- readr :: read_csv(most_recent ) %> %
480
493
rename(value = state_med_conc ) %> %
481
494
arrange(geo_value , time_value )
482
- state_code <- readr :: read_csv(here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ), show_col_types = FALSE )
483
- hhs_codes <- readr :: read_csv(here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" ), show_col_types = FALSE )
495
+ state_code <- readr :: read_csv(
496
+ here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
497
+ show_col_types = FALSE
498
+ )
499
+ hhs_codes <- readr :: read_csv(
500
+ here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" ),
501
+ show_col_types = FALSE
502
+ )
484
503
state_to_hhs <- hhs_codes %> %
485
504
left_join(state_code , by = " state_code" ) %> %
486
505
select(hhs_region = hhs , geo_value = state_id )
@@ -489,8 +508,7 @@ rlang::list2(
489
508
drop_na() %> %
490
509
select(- agg_level , - year , - agg_level , - population , - density )
491
510
pop_data <- gen_pop_and_density_data()
492
- nwss_hhs_region <-
493
- nwss %> %
511
+ nwss_hhs_region <- nwss %> %
494
512
left_join(state_to_hhs , by = " geo_value" ) %> %
495
513
mutate(year = year(time_value )) %> %
496
514
left_join(pop_data , by = join_by(geo_value , year )) %> %
@@ -517,8 +535,12 @@ rlang::list2(
517
535
tar_target(
518
536
name = hhs_region ,
519
537
command = {
520
- hhs_region <- readr :: read_csv(" https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_code_hhs_table.csv" )
521
- state_id <- readr :: read_csv(" https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_codes_table.csv" )
538
+ hhs_region <- readr :: read_csv(
539
+ " https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_code_hhs_table.csv"
540
+ )
541
+ state_id <- readr :: read_csv(
542
+ " https://raw.githubusercontent.com/cmu-delphi/covidcast-indicators/refs/heads/main/_delphi_utils_python/delphi_utils/data/2020/state_codes_table.csv"
543
+ )
522
544
hhs_region %> %
523
545
left_join(state_id , by = " state_code" ) %> %
524
546
select(hhs_region = hhs , geo_value = state_id ) %> %
@@ -534,22 +556,22 @@ rlang::list2(
534
556
rename(" hhs" : = value ) %> %
535
557
add_hhs_region_sum(hhs_region ) %> %
536
558
filter(geo_value != " us" ) %> %
537
- as_epi_archive(
538
- compactify = TRUE
539
- )
559
+ # Always convert to data.frame after dplyr operations on data.table
560
+ # https://github.com/cmu-delphi/epiprocess/issues/618
561
+ as.data.frame() %> %
562
+ as_epi_archive(compactify = TRUE )
540
563
joined_archive_data $ geo_type <- " custom"
541
564
# drop aggregated geo_values
542
- joined_archive_data <- joined_archive_data %> %
543
- epix_merge(nwss_coarse , sync = " locf" )
544
- joined_archive_data $ geo_type <- " custom"
545
- # TODO: Maybe bring these back
546
- # epix_merge(doctor_visits_weekly_archive, sync = "locf") %>%
547
- joined_archive_data %<> %
548
- epix_merge(nssp_archive , sync = " locf" )
565
+ joined_archive_data <- joined_archive_data %> % epix_merge(nwss_coarse , sync = " locf" )
566
+ joined_archive_data %<> % epix_merge(nssp_archive , sync = " locf" )
549
567
joined_archive_data $ geo_type <- " custom"
550
- joined_archive_data %<> %
551
- epix_merge(google_symptoms_archive , sync = " locf" )
552
- joined_archive_data $ DT %<> % filter(grepl(" [a-z]{2}" , geo_value ), ! (geo_value %in% c(" as" , " pr" , " vi" , " gu" , " mp" )))
568
+ joined_archive_data %<> % epix_merge(google_symptoms_archive , sync = " locf" )
569
+ joined_archive_data <- joined_archive_data $ DT %> %
570
+ filter(grepl(" [a-z]{2}" , geo_value ), ! (geo_value %in% c(" as" , " pr" , " vi" , " gu" , " mp" ))) %> %
571
+ # Always convert to data.frame after dplyr operations on data.table
572
+ # https://github.com/cmu-delphi/epiprocess/issues/618
573
+ as.data.frame() %> %
574
+ as_epi_archive(compactify = TRUE )
553
575
joined_archive_data $ geo_type <- " state"
554
576
slide_forecaster(
555
577
epi_archive = joined_archive_data ,
@@ -591,7 +613,7 @@ rlang::list2(
591
613
rename(model = forecaster ) %> %
592
614
rename(prediction = value ) %> %
593
615
filter(! is.na(geo_value ))
594
- evaluate_predictions(predictions_cards = filtered_forecasts , truth_data = actual_eval_data ) %> %
616
+ evaluate_predictions(forecasts = filtered_forecasts , truth_data = actual_eval_data ) %> %
595
617
rename(forecaster = model )
596
618
}
597
619
),
0 commit comments