Skip to content

Commit

Permalink
Make datetime input more flexible for az_hourly() (#62)
Browse files Browse the repository at this point in the history
* update parse_params

* update tests

* update documentation

* update NEWS

* lubridate::

* change mock dir name

* fix timezone to MST

* clarify timezone

* print messages when defaults are used or when date is rounded up to 23:59:59

* Update NEWS.md

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/parse_params.R

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/az_hourly.R

Co-authored-by: Jeremy Weiss <[email protected]>

* update documentation

* add warning for requesting data more recent than is on API

* update mocks

* update to include new columns in az_heat()

* update test

* refactor to get warnings working

* re-work parse_params and messages

* simplify redactor

* explicitly specify method to get httptest2 working better

* simplify mocking

* create tests for warnings and messages

* modify messages and warnings

* only get single hour with start and end null not full day

* add/move "returning" messages

* add messages to parse_params

* update NEWS

* re-order conditionals for easier reading

* revert check for earlier than now

* fix tests

* update API mocks

* clean up tests

* remove old mocks

* update documentation

* update tests

* finish tidying up tests and mocking

* Update R/az_daily.R

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/parse_params.R

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/az_hourly.R

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/az_daily.R

Co-authored-by: Jeremy Weiss <[email protected]>

* Update R/az_daily.R

Co-authored-by: Jeremy Weiss <[email protected]>

* regenerate mocks

* hourly now only returns a single hour if no datetimes specified

* update tests

* update news

* relax test for current hour

---------

Co-authored-by: Jeremy Weiss <[email protected]>
  • Loading branch information
Aariq and jeremylweiss committed Dec 11, 2023
1 parent f22fdf8 commit 5a08de0
Show file tree
Hide file tree
Showing 55 changed files with 4,774 additions and 34,403 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ BugReports: https://github.com/uace-azmet/azmetr/issues
Suggests:
covr,
ggplot2,
glue,
httptest2,
knitr,
rmarkdown,
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# azmetr (development version)

- `az_hourly()` now accepts dates for `start_date_time` and `end_date_time`
- Improved messages regarding what date/time ranges are being queried and returned by data retrieval functions.
- `az_hourly()` now returns data from the previous hour when `start_date_time` and `end_date_time` are not supplied rather than returning the previous day of hourly data.
- `azmet` is now much more verbose, printing messages about which data are requested and which data are returned.


# azmetr 0.2.1

- `az_daily()` and `az_hourly()` now print a warning if there is any missing data for the combination of dates and stations requested
Expand Down
13 changes: 4 additions & 9 deletions R/az_add_units.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ az_add_units <- function(x) {
"heat_units_45F_sum",
"heat_units_9455F",
"heat_units_50F_sum",
"heat_units_55F_sum"
"heat_units_55F_sum",
"heat_units_9455F_sum"
)), ~units::set_units(., "degF")
)) %>%
dplyr::mutate(dplyr::across(dplyr::any_of(c(
Expand All @@ -87,14 +88,8 @@ az_add_units <- function(x) {
dplyr::ends_with("_in_sum")
), ~units::set_units(., "in")
)) %>%
dplyr::mutate(dplyr::across(dplyr::any_of(c(
"chill_hours_0C",
"chill_hours_20C",
"chill_hours_32F",
"chill_hours_45F",
"chill_hours_68F",
"chill_hours_7C"
)), ~units::set_units(., "hours")
dplyr::mutate(dplyr::across(
dplyr::starts_with("chill_hours"), ~units::set_units(., "hours")
)) %>%
dplyr::mutate(dplyr::across(dplyr::starts_with("vp_"),
~units::set_units(., "kPa")
Expand Down
29 changes: 23 additions & 6 deletions R/az_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,32 @@ az_daily <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
parse_params(station_id = station_id, start = start_date, end = end_date)

# Query API --------------------------------------------
if (is.null(start_date) & is.null(end_date)) {
message("Querying data from ", params$start)
} else {
message("Querying data from ", params$start, " through ", params$end)
}

if (length(station_id) <= 1) {
out <-
retrieve_data(params$station_id,
params$start,
params$start_f,
params$time_interval,
endpoint = "daily")
} else if (length(station_id) > 1) {
out <-
purrr::map_df(
params$station_id,
function(x) {
retrieve_data(x, params$start, params$time_interval, endpoint = "daily")
retrieve_data(x,
params$start_f,
params$time_interval,
endpoint = "daily")
}
)
}

if(nrow(out) == 0) {
if (nrow(out) == 0) {
warning("No data retrieved from API")
#return 0x0 tibble for type consistency
return(tibble::tibble())
Expand All @@ -81,7 +90,9 @@ az_daily <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
n_obs <- out %>%
dplyr::summarise(n = dplyr::n(), .by = dplyr::all_of("meta_station_id")) %>%
dplyr::filter(.data$n < as.numeric(lubridate::period(params$time_interval), "day") + 1)
if(nrow(n_obs) != 0) {
if (nrow(n_obs) != 0 |
# Also warn if the missing data is just at the end
lubridate::ymd(max(out$datetime), tz = "America/Phoenix") < params$end) {
warning("Some requested data were unavailable")
}

Expand All @@ -98,7 +109,7 @@ az_daily <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
#convert NAs
dplyr::mutate(
dplyr::across(
tidyselect::where(is.numeric),
tidyselect::where(is.numeric),
function(x)
dplyr::if_else(x %in% c(-999, -9999, -99999, -7999, 999, 999.9, 9999), NA_real_, x)
)
Expand All @@ -114,7 +125,13 @@ az_daily <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
wind_2min_timestamp = lubridate::with_tz(
lubridate::parse_date_time(.data$wind_2min_timestamp, orders = "ymdHMSz"),
tzone = "America/Phoenix"
)
)
)

if (length(unique(out$datetime)) == 1) {
message("Returning data from ", unique(out$datetime))
} else {
message("Returning data from ", min(out$datetime), " through ", max(out$datetime))
}
return(out)
}
32 changes: 26 additions & 6 deletions R/az_heat.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#' precision is supplied.
#' @param end_date A length 1 vector of class Date, POSIXct, or character in
#' YYYY-MM-DD format. Will be rounded **down** to the nearest day if more
#' precision is supplied. Defaults to the current date if left blank.
#' precision is supplied. Defaults to the current date if left blank. If only
#' an `end_date` is supplied, then data will be cumulative from the start of
#' the year of `end_date`.
#' @details Unlike [az_daily()], only one row of data per station is returned,
#' regardless of `start_date` and `end_date`. However, the data returned is
#' cumulative over the time period specified by `start_date` and `end_date`.
Expand Down Expand Up @@ -52,23 +54,36 @@ az_heat <- function(station_id = NULL, start_date = NULL, end_date = NULL) {

#TODO: document output columns or link to API docs if appropriate
#TODO: check for valid station IDs
#TODO: allow end_date to be specified without start_date
check_internet()

# If no start date supplied, default is Jan 1 of current year.
if (is.null(start_date)) {
if(is.null(end_date)) {
start_date <- lubridate::floor_date(lubridate::today(), "year")
} else {
start_date <- lubridate::floor_date(lubridate::ymd(end_date), "year")
}
}
params <- parse_params(station_id, start = start_date, end = end_date)

# Query API --------------------------------------------

message("Querying data from ", format(params$start, "%Y-%m-%d"),
" through ", format(params$end, "%Y-%m-%d"))

if (length(station_id) <= 1) {
out <-
retrieve_data(params$station_id,
params$start,
params$start_f,
params$time_interval,
endpoint = "hueto")
} else if (length(station_id) > 1) {
out <- purrr::map_df(
params$station_id,
function(x) {
retrieve_data(x, params$start, params$time_interval, endpoint = "hueto")
retrieve_data(x,
params$start_f,
params$time_interval,
endpoint = "hueto")
}
)
}
Expand All @@ -79,7 +94,7 @@ az_heat <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
return(tibble::tibble())
}

# Wrangle output ----------------------------------------------------------
# Wrangle output ----------------------------------------------------------
out <- out %>%
#move metadata to beginning
dplyr::select(dplyr::starts_with("meta_"), dplyr::everything()) %>%
Expand All @@ -96,5 +111,10 @@ az_heat <- function(station_id = NULL, start_date = NULL, end_date = NULL) {
function(x)
dplyr::if_else(x %in% c(-999, -9999, -99999, -7999, 999, 999.9, 9999), NA_real_, x))
)

# Since output from API doesn't contain any information about dates, this is just an assumption
message("Returning data from ", format(params$start, "%Y-%m-%d"),
" through ", format(params$end, "%Y-%m-%d"))

return(out)
}
56 changes: 49 additions & 7 deletions R/az_hourly.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
#' digits (e.g. `station_id = c("az08", "az37")`) If left blank data for all
#' stations will be returned
#' @param start_date_time A length 1 vector of class POSIXct or character in
#' YYYY-MM-DD HH format. Will be rounded **down** to the nearest hour if more
#' precision is supplied.
#' YYYY-MM-DD HH format, in AZ time. Will be rounded **down** to the nearest
#' hour if more precision is supplied. If only a date (YYYY-MM-DD) is
#' supplied, data will be requested starting at 01:00:00 of that day
#' @param end_date_time A length 1 vector of class POSIXct or character in
#' YYYY-MM-DD HH format. Will be rounded **down** to the nearest hour if more
#' precision is supplied. Defaults to the current time if left blank.
#' YYYY-MM-DD HH format, in AZ time. Will be rounded **down** to the nearest
#' hour if more precision is supplied. If only a date (YYYY-MM-DD) is
#' supplied, data will be requested through the *end* of that day (23:59:59).
#' Defaults to the current date and time if left blank and `start_date_time`
#' is specified.
#' @details If neither `start_date_time` nor `end_date_time` are supplied, the
#' most recent day of data will be returned. If only `start_date_time` is
#' most recent hour of data will be returned. If only `start_date_time` is
#' supplied, then `end_date_time` defaults to the current time. Supplying
#' only `end_date_time` will result in an error.
#' @note If `station_id` is supplied as a vector, multiple successive calls to
Expand Down Expand Up @@ -55,35 +59,66 @@ az_hourly <- function(station_id = NULL, start_date_time = NULL, end_date_time =
end = end_date_time, hour = TRUE)

# Query API --------------------------------------------
if (is.null(start_date_time) & is.null(end_date_time)) {
message("Querying most recent hour of data")
} else {
message("Querying data from ", format(params$start, "%Y-%m-%d %H:%M"),
" through ", format(params$end, "%Y-%m-%d %H:%M"))
}

if (length(station_id) <= 1) {
out <-
retrieve_data(params$station_id,
params$start,
params$start_f,
params$time_interval,
endpoint = "hourly")
} else if (length(station_id) > 1) {
out <-
purrr::map_df(
params$station_id,
function(x) {
retrieve_data(x, params$start, params$time_interval, endpoint = "hourly")
retrieve_data(x,
params$start_f,
params$time_interval,
endpoint = "hourly")
}
)
}

# If most recent hour is queried, make sure only one hour is returned
if (is.null(start_date_time) & is.null(end_date_time)) {
out <-
out |>
dplyr::filter(.data$date_datetime == max(.data$date_datetime), .by = "meta_station_id")
}

if(nrow(out) == 0) {
warning("No data retrieved from API")
#return 0x0 tibble
return(tibble::tibble())
}

#Check if any data is missing
#Note, this always "passes" when both start and end are NULL (because period("*") is NA)
n_obs <- out %>%
dplyr::summarise(n = dplyr::n(), .by = dplyr::all_of("meta_station_id")) %>%
dplyr::filter(.data$n < as.numeric(lubridate::period(params$time_interval), "hour"))
if(nrow(n_obs) != 0) {
warning("Some requested data were unavailable")
}

#Warn if the missing data is just at the end
if (lubridate::ymd_hms(max(out$date_datetime), tz = "America/Phoenix") < params$end) {
warning(
"You requested data through ",
params$end,
" but only data through ",
max(out$date_datetime),
" were available"
)
}


# Wrangle output ----------------------------------------------------------
out <- out %>%
#move metadata to beginning
Expand Down Expand Up @@ -127,5 +162,12 @@ az_hourly <- function(station_id = NULL, start_date_time = NULL, end_date_time =
tzone = "America/Phoenix"
)
)

if (length(unique(out$date_datetime)) == 1) {
message("Returning data from ", format(unique(out$date_datetime), "%Y-%m-%d %H:%M"))
} else {
message("Returning data from ", format(min(out$date_datetime), "%Y-%m-%d %H:%M"),
" through ", format(max(out$date_datetime), "%Y-%m-%d %H:%M"))
}
return(out)
}
Loading

0 comments on commit 5a08de0

Please sign in to comment.