diff --git a/DESCRIPTION b/DESCRIPTION index a712865..6ee6db2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: hbGIS Title: Process GIS Data for Human Behaviour Research Description: Processes GIS data merge with hbGPS pre-processed data of wearable GPS and accelerometation sensors. Version: 0.0.1 -Date: 2023-11-09 +Date: 2023-12-20 Authors@R: c(person(given = "Vincent", family = "van Hees", diff --git a/NAMESPACE b/NAMESPACE index 78f74cf..2e381d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,9 @@ # Generated by roxygen2: do not edit by hand export(build_days) -export(build_hbGIS) export(build_multimodal) export(build_trajectories) +export(build_whenwhatwhere) export(check_and_clean_palms_data) export(check_missing_id) export(hbGIS) diff --git a/R/build_days.R b/R/build_days.R index 29c96a5..2c95c77 100644 --- a/R/build_days.R +++ b/R/build_days.R @@ -1,25 +1,16 @@ -#' Calculate day-level summaries from the palmsplus dataset +#' Calculate day-level summaries from the whenwhat dataset #' -#' @description Build a days dataset by summarising \code{palmsplus} -#' by day and person (\code{identifier}). Not all variables in \code{palmsplus} -#' are summarised, only those specified using \code{\link{palms_add_field}} with -#' the argument \code{domain_field = TRUE}. By default, a \code{duration} field -#' is added (e.g., the total minutes per day). +#' @description Build a days dataset by summarising \code{whenwhat} +#' by day and person (\code{identifier}). #' -#' All data are summarised by default. However, additional aggragation \emph{domains} -#' can be specified using \code{\link{palms_add_domain}} before building days. -#' Domains are a subset of data, such as during school time. All \code{domain_field} -#' variables will be summarised for each \emph{domain} seperatly. -#' -#' @param data The palmsplus data obtained from \code{\link{palms_build_palmsplus}}. +#' @param data The whenwhat data obtained from \code{\link{build_whenwhatwhere}}. #' @param verbose Print progress to console. Default is \code{TRUE}. -#' @param palmsplus_domains ... -#' @param palmsplus_fields ... +#' @param where_field ... +#' @param whenwhat_field ... #' @param loca Nested list with location information #' @param participant_basis participant_basis -#' -#' +#'#' #' @return A table summarised by day. #' #' @import dplyr @@ -30,8 +21,8 @@ #' @export # Code modified from https://thets.github.io/palmsplusr/ build_days <- function(data = NULL, verbose = TRUE, - palmsplus_domains = NULL, - palmsplus_fields = NULL, + where_field = NULL, + whenwhat_field = NULL, loca = NULL, participant_basis = NULL) { # Note: @@ -47,38 +38,37 @@ build_days <- function(data = NULL, verbose = TRUE, } } - duration = datetime = name = domain_field = NULL - - domain_fields <- palmsplus_domains %>% filter(domain_field == TRUE) - domain_names <- domain_fields %>% pull(name) + duration = datetime = name = is_where_field = NULL + where_field <- where_field %>% filter(is_where_field == TRUE) + where_names <- where_field %>% pull(name) - if (is.null(domain_names)) { - domain_names <- "total" + if (is.null(where_names)) { + where_names <- "total" } else { - domain_names <- c("total", domain_names) + where_names <- c("total", where_names) } - domain_args <- setNames("1", "total") %>% lapply(parse_expr) - domain_args <- c(domain_args, setNames(domain_fields[[2]], domain_fields[[1]]) %>% + where_args <- setNames("1", "total") %>% lapply(parse_expr) + where_args <- c(where_args, setNames(where_field[[2]], where_field[[1]]) %>% lapply(parse_expr)) data <- data %>% - mutate(!!! domain_args) %>% + mutate(!!! where_args) %>% mutate_if(is.logical, as.integer) - fields <- palmsplus_fields %>% filter(domain_field == TRUE) %>% pull(name) + fields <- whenwhat_field %>% filter(is_where_field == TRUE) %>% pull(name) data <- data %>% st_set_geometry(NULL) %>% - dplyr::select(identifier, datetime, any_of(domain_names), all_of(fields)) %>% + dplyr::select(identifier, datetime, any_of(where_names), all_of(fields)) %>% mutate(duration = 1) %>% mutate_at(vars(-identifier,-datetime), ~ . * palms_epoch(data) / 60) %>% group_by(identifier, date = as.Date(datetime)) %>% dplyr::select(-datetime) x <- list() - for (i in domain_names) { + for (i in where_names) { x[[i]] <- data %>% filter(!!(as.name(i)) > 0) %>% - dplyr::select(-any_of(domain_names), duration) %>% + dplyr::select(-any_of(where_names), duration) %>% summarise_all(~ sum(.)) %>% ungroup() %>% rename_at(vars(-identifier, -date), ~ paste0(i, "_", .)) @@ -87,13 +77,13 @@ build_days <- function(data = NULL, verbose = TRUE, result <- x %>% reduce(left_join, by = c("identifier" = "identifier", "date" = "date")) - # Count the number of segments per domain per day per identifier + # Count the number of segments per where per day per identifier segmentcount = function(x) { x = as.numeric(unlist(x)) # x is a tibble column, so first convert to numeric vector return(length(which(rle(x)$values != 0))) } - for (dom in domain_names) { + for (dom in where_names) { if (dom != "total") { result[, dom] <- NA for (id in unique(result$identifier)) { diff --git a/R/build_multimodal.R b/R/build_multimodal.R index cfd6df8..520a68a 100644 --- a/R/build_multimodal.R +++ b/R/build_multimodal.R @@ -6,7 +6,7 @@ #' @param data The trajectories object built with \code{palms_calc_trajectories}. #' @param spatial_threshold Spatial threshold in meters #' @param temporal_threshold Temporal threshold in minutes -#' @param palmsplus The dataset build by \code{build_hbGIS} +#' @param whenwhat The dataset build by \code{build_hbGIS} #' @param verbose Print progress after each step. Default is \code{TRUE}. #' @param multimodal_fields ... #' @param trajectory_locations ... @@ -40,7 +40,7 @@ build_multimodal <- function(data = NULL, spatial_threshold, temporal_threshold, - palmsplus = NULL, + whenwhat = NULL, verbose = TRUE, multimodal_fields = NULL, trajectory_locations = NULL) { @@ -127,8 +127,8 @@ build_multimodal <- function(data = NULL, trajectory_locations$end_criteria)) - # Rather than recalculating geometry, just lookup in palmsplus - lookup <- palmsplus %>% + # Rather than recalculating geometry, just lookup in whenwhat + lookup <- whenwhat %>% filter(tripnumber > 0 & triptype %in% c(1, 4)) %>% as.data.frame() %>% dplyr::select(all_of(c("identifier", "tripnumber", "triptype", names))) diff --git a/R/build_hbGIS.R b/R/build_whenwhatwhere.R similarity index 74% rename from R/build_hbGIS.R rename to R/build_whenwhatwhere.R index 21a4794..9fdd459 100644 --- a/R/build_hbGIS.R +++ b/R/build_whenwhatwhere.R @@ -1,12 +1,11 @@ - -#' Build the hbGIS dataset +#' build_whenwhatwhere #' -#' @description Build the \code{hbGIS} dataset by adding additional columns to the hbGPS output data. -#' The additional columns are specified using \code{\link{palms_add_field}}. +#' @description Build the whenwhat dataset by adding additional columns to the +#' hbGPS output data based on whenwhat_field. #' #' @param data The hbGPS data obtained using \code{read_palms} from palmplusr. #' @param verbose Print progress to console after each iteration. Default is \code{TRUE}. -#' @param palmsplus_fields palmsplus_fields defined in hbGIS +#' @param whenwhat_field whenwhat_field defined in hbGIS #' @param loca Nested list with location information #' @param participant_basis participant_basis #' @@ -21,13 +20,13 @@ #' @export #' # Code modified from https://thets.github.io/palmsplusr/ -build_hbGIS <- function(data = NULL, verbose = TRUE, palmsplus_fields = NULL, +build_whenwhatwhere <- function(data = NULL, verbose = TRUE, whenwhat_field = NULL, loca = NULL, participant_basis = NULL) { # Note: # home, school, home_nbh, school_nbh (or similar) need to be present, # because the functions that are passed on assume that they exist - # So, now we need to create those objects from object loca + # So, create those objects from object loca identifier = NULL Nlocations = length(loca) for (i in 1:Nlocations) { @@ -37,8 +36,7 @@ build_hbGIS <- function(data = NULL, verbose = TRUE, palmsplus_fields = NULL, } } - - field_args <- setNames(palmsplus_fields$formula, palmsplus_fields$name) %>% + field_args <- setNames(whenwhat_field$formula, whenwhat_field$name) %>% lapply(parse_expr) x <- list() diff --git a/R/check_and_clean_palms_data.R b/R/check_and_clean_palms_data.R index b258b1b..eb2ef99 100644 --- a/R/check_and_clean_palms_data.R +++ b/R/check_and_clean_palms_data.R @@ -41,8 +41,9 @@ check_and_clean_palms_data <- function(palms_to_clean, country_name, outputdir = # Saving the new 'clean' dataset - %>% --------------------------------------- # write_csv(palms, str_replace(link_to_csv, pattern = '.csv', '_cleaned.csv'), na = "") - data.table::fwrite(error_list, paste(outputdir, country_name,"error_list.csv", sep = "_")) - + if (nrow(error_list) > 0) { + data.table::fwrite(error_list, paste(outputdir, country_name,"error_list.csv", sep = "_")) + } return(palms_to_clean_lower) } diff --git a/R/hbGIS.R b/R/hbGIS.R index d3d007e..4d72be9 100644 --- a/R/hbGIS.R +++ b/R/hbGIS.R @@ -1,4 +1,4 @@ -#' palmsplusr_shiny +#' hbGIS #' #' @param gisdir Path to directory with GIS files #' @param palmsdir Path to hbGPS output directory @@ -7,6 +7,11 @@ #' @param dataset_name Name of dataset #' @param configfile Configuration file #' @param verbose verbose Boolean +#' @param baselocation character, to specify reference location for individuals, e.g. home +#' @param groupinglocation character, to specify reference location location for groups, e.g. school +#' @param write_shp boolean, to indicate whether shape file should be written +#' @param split_GIS boolean, to indicate whether sublocation inside GIS files are to be split +#' @param sublocationID character, GIS column name to be used as identifier for sublocations #' @return palms_to_clean_lower object #' @importFrom stats end start formula as.formula #' @importFrom tidyr pivot_wider @@ -22,18 +27,68 @@ hbGIS <- function(gisdir = "", gislinkfile = "", outputdir = "", dataset_name = "", - configfile = "", - verbose = TRUE) { - # Hard code arguments that may need to become function arguments to give control to user - groupinglocation = "school" - writeshp = FALSE # whether to wrist a shape file - splitGIS = TRUE # whether to split sublocations (TRUE) or union them - sublocid = "OBJECTID" # column name in GIS file to identify sublocation id - + configfile = NULL, + verbose = TRUE, + baselocation = NULL, + groupinglocation = NULL, + write_shp = NULL, + split_GIS = NULL, + sublocationID = NULL) { + #=============================================== + # Load configuration and define field tables + #=============================================== + if (length(configfile) > 0) { + # check for missing parameters, such that hbGIS can fall back on defaults + # here the config_pamsplusr file inside the package is assumed to hold all the defaults. + config_def = system.file("testfiles_hbGIS/config_hbGIS.csv", package = "hbGIS")[1] + + params_def = load_params(file = config_def) + params_def$id = rownames(params_def) + params = load_params(file = configfile) + params$id = rownames(params) + missingPar = which(params_def$id %in% params$id == FALSE) + if (length(missingPar) > 0) { + # update the configfile as provide by the user + params = rbind(params, params_def[missingPar,]) + params = params[, -which(colnames(params) == "id")] + update_params(new_params = params, file = configfile) + } + rm(params_def) + config <- configfile + } else { + # If no configfile is provided fall back on default + config <- system.file("testfiles_hbGIS/config_hbGIS.csv", package = "hbGIS")[1] + } + # adding fields + CONF = read.csv(config, sep = ",") + CONF$start_criteria = "" + CONF$end_criteria = "" + + # Extract general parameters from config file if not provided as input arguments + if (is.null(groupinglocation)) { + groupinglocation = CONF$formula[which(CONF$name == "groupinglocation")] + } + if (is.null(baselocation)) { + baselocation = CONF$formula[which(CONF$name == "baselocation")] + } + if (is.null(write_shp)) { + write_shp = CONF$formula[which(CONF$name == "write_shp")] # whether to wrist a shape file + } + if (is.null(split_GIS)) { + split_GIS = CONF$formula[which(CONF$name == "split_GIS")] # whether to split sublocations (TRUE) or union them + } + if (is.null(sublocationID)) { + sublocationID = CONF$formula[which(CONF$name == "sublocationID")] # column name in GIS file to identify sublocation id + } + CONF = CONF[-which(CONF$context == "general"),] + #------------------------------------------------------------ lon = identifier = palms = NULL # . = was also included, but probably wrong + + + #=============================================== # GIS files #=============================================== @@ -100,19 +155,18 @@ hbGIS <- function(gisdir = "", # Check whether there are multiple polygons in the shapefile: nshp = nrow(shp_dat) loca[[jj]][[2]] = shp_dat # look at all sublocations combined either way - if (publiclocation == TRUE) { + if (publiclocation == TRUE & verbose == TRUE) { cat(paste0("\n", basename(as.character(unlist(loca[[jj]][4]))), " => ", paste0(names(shp_dat), collapse = ", "), " (", nshp, " geoms)")) } - if (nshp > 1 & splitGIS == TRUE & sublocid %in% names(shp_dat) & publiclocation) { + if (nshp > 1 & split_GIS == TRUE & sublocationID %in% names(shp_dat) & publiclocation) { collect_na = NULL # Treat each polygon as a separate location for (gi in 1:nshp) { fn_4 = as.character(unlist(loca[[jj]][4])) gi2 = Nlocations + gi loca[[gi2]] = vector("list", 4) - - objectname = as.character(st_drop_geometry(shp_dat[gi, sublocid])) + objectname = as.character(sf::st_drop_geometry(shp_dat[gi, sublocationID])) if (objectname == "NA") collect_na = c(collect_na, gi) loca[[gi2]][[2]] = shp_dat[gi, ] loca[[gi2]][[4]] = fn_4 @@ -150,55 +204,22 @@ hbGIS <- function(gisdir = "", #=============================================== # hbGPS output (PALMS output) #=============================================== - palmsplus_folder = paste0(outputdir, "/hbGIS_output") - if (!dir.exists(palmsplus_folder)) { + outputFolder = paste0(outputdir, "/hbGIS_output") + if (!dir.exists(outputFolder)) { if (verbose) cat("\nCreating hbGIS output directory\n") - dir.create(palmsplus_folder) + dir.create(outputFolder) } sf::sf_use_s2(FALSE) # identify palms csv output files in palmsdir: - palms_country_files <- list.files(path = palmsdir, pattern = "*.csv", full.names = TRUE) - # skip the combined file that palms generates - palms_country_files = grep(pattern = "combined.csv", x = palms_country_files, invert = TRUE, value = TRUE) + if (!is.null(palmsdir)) { + palms_country_files <- list.files(path = palmsdir, pattern = "*.csv", full.names = TRUE) + # skip the combined file that palms generates + palms_country_files = grep(pattern = "combined.csv", x = palms_country_files, invert = TRUE, value = TRUE) + } else { + palms_country_files = NULL + } if (length(palms_country_files) == 0) { - # Simulate hbGPS output (only for code developement purposes) - Nmin = 500 - now = as.POSIXct("2023-11-30 10:00:00 CET") - dateTime = seq(now, now + ((Nmin - 1) * 60), by = 60) - example_object = loca[[1]][[2]][1,] - point_in_object = st_sample(x = example_object, size = 1) - xy = sf::st_coordinates(x = point_in_object) - - # latitude is for most of the time 1 lat degree away from location - # but for 30 minutes inside the location surround by 5 minute trips before and after - trip = seq(xy[1] - 1, xy[1] - 0.2, by = 0.2) - away = rep(xy[1] - 1, (Nmin/2) - 20) - lon = c(away, trip, rep(xy[1], 30), rev(trip), away) - lat = rep(xy[2], Nmin) # lon stays the same the entire time - tripNumber = c(rep(0, length(away)), rep(1, 5), rep(0, 30), rep(2, 5), rep(0, length(away))) - sedentaryBoutNumber = c(rep(1, length(away)), rep(0, 5), rep(2, 30), rep(0, 5), rep(3, length(away))) - tripType = rep(0, Nmin) - tripMOT = rep(0, Nmin) - tripType[which(diff(tripNumber) > 0) + 1] = 1 - tripType[which(diff(tripNumber) < 0)] = 4 - tripMOT[which(tripNumber != 0)] = 3 - hbGPSout = data.frame(identifier = "sim1", - dateTime = dateTime, - dow = rep(5, Nmin), - lat = lat, - lon = lon, - fixTypeCode = rep(-1, Nmin), - iov = rep(2, Nmin), # all the time outdoor (indoor, outdoor, vehicle) - tripNumber = tripNumber, - tripType = tripType, - tripMOT = tripMOT, - activity = rep(0, Nmin), - activityIntensity = rep(0, Nmin), - activityBoutNumber = rep(0, Nmin), - sedentaryBoutNumber = sedentaryBoutNumber) - if (!dir.exists(palmsdir)) dir.create(palmsdir, recursive = TRUE) - palms_country_files = paste0(palmsdir, "/combined.csv") - write.csv(hbGPSout, file = palms_country_files, row.names = FALSE) + stop("\nno data found") } # read and combine palms csv output files @@ -221,80 +242,34 @@ hbGIS <- function(gisdir = "", PALMS_reduced$dateTime = as.POSIXct(PALMS_reduced$dateTime, format = "%d/%m/%Y %H:%M:%S", tz = "") # Write to csv and read using read_palms to format the object as expected from the rest of the code - PALMS_reduced_file = normalizePath(paste0(palmsplus_folder, "/", stringr::str_interp("PALMS_${dataset_name}_reduced.csv"))) + # if (substring(text = outputFolder, first = 1, last = 1) == ".") { + # print("convert") + # print(outputFolder) + # outputFolder = gsub(pattern = "[.]", replacement = getwd(), x = outputFolder) + # print(outputFolder) + # } + PALMS_reduced_file = suppressWarnings(normalizePath(paste0(outputFolder, "/", stringr::str_interp("PALMS_${dataset_name}_reduced.csv")))) # if (verbose) cat(paste0("\nCheck PALMS_reduced_file: ", PALMS_reduced_file)) write.csv(palms_reduced_cleaned, PALMS_reduced_file, row.names = FALSE) palms = palmsplusr::read_palms(PALMS_reduced_file, verbose = FALSE) palms$datetime = as.POSIXct(palms$datetime, format = "%d/%m/%Y %H:%M:%S", tz = "") - #=============================================== - # Load linkage file and identify which PALMS ids and home/school - # ids are missing, but allow for publiclocations that are not linked - # to an ID - #=============================================== - if (length(gislinkfile) > 0) { - participant_basis = read_csv(gislinkfile, show_col_types = FALSE) - - - # Check for missing IDs ------------------------------------------------------------------------- - withoutMissingId = check_missing_id(participant_basis, palmsplus_folder, dataset_name, palms, - loca, groupinglocation = groupinglocation, - verbose = verbose) - palms = withoutMissingId$palms - participant_basis = withoutMissingId$participant_basis - loca = withoutMissingId$loca - write.csv(participant_basis, paste0(palmsplus_folder, "/", stringr::str_interp("participant_basis_${dataset_name}.csv"))) # store file for logging purposes only - if (length(participant_basis) == 0 || nrow(participant_basis) == 0) { - stop("\nParticipant basis file does not include references for the expected recording IDs") - } - } else { - participant_basis = "" - } - #=============================================== - # Load configuration and define field tables - #=============================================== - if (length(configfile) > 0) { - # check for missing parameters, such that hbGIS can fall back on defaults - # here the config_pamsplusr file inside the package is assumed to hold all the defaults. - config_def = system.file("testfiles_hbGIS/config_hbGIS.csv", package = "hbGIS")[1] - params_def = load_params(file = config_def) - params_def$id = rownames(params_def) - params = load_params(file = configfile) - params$id = rownames(params) - missingPar = which(params_def$id %in% params$id == FALSE) - if (length(missingPar) > 0) { - # update the configfile as provide by the user - params = rbind(params, params_def[missingPar,]) - params = params[, -which(colnames(params) == "id")] - update_params(new_params = params, file = configfile) - } - rm(params_def) - config <- configfile - } else { - # If no configfile is provided fall back on default - config <- system.file("testfiles_hbGIS/config_hbGIS.csv", package = "hbGIS")[1] - } - # adding fields - CONF = read.csv(config, sep = ",") - CONF$start_criteria = "" - CONF$end_criteria = "" #===================================================== # Expand CONF with standard location based fields #===================================================== if (verbose) cat("\n<<< expand CONF...\n") - # palmsplus_domain: + # where_field: #------------------- - # ignore stored definition as we no longer use this - - CONF = CONF[which(CONF[,1] != "palmsplus_domain"), ] + # # ignore stored definition as we no longer use this + # CONF = CONF[which(CONF[,1] != "where_field"), ] element3 = ifelse(length(locationNames_table) > 0, yes = paste0("!", paste0("at_", locationNames_table, collapse = " & !"), " & "), no = "") - CONF[nrow(CONF) + 1, ] = c("palmsplus_domain", + CONF[nrow(CONF) + 1, ] = c("where_field", "transport", paste0(element3, "(pedestrian | bicycle | vehicle)"), TRUE, NA, "", "") - CONF[nrow(CONF) + 1, ] = c("palmsplus_domain", + CONF[nrow(CONF) + 1, ] = c("where_field", "other", paste0(element3, "(!pedestrian & !bicycle & !vehicle)", # removed because theorectically possible ifelse(test = length(locationNames_nbh) > 0, yes = " & ", no = ""), @@ -306,10 +281,10 @@ hbGIS <- function(gisdir = "", cnt = cnt + 1 for (i in 1:Nlocations) { - # palmsplus_domain: + # where_field: #------------------- if (locationNames[i] %in% locationNames_table) { - CONF[cnt, ] = c("palmsplus_domain", + CONF[cnt, ] = c("where_field", locationNames[i], paste0("at_", locationNames[i]), TRUE, @@ -317,7 +292,7 @@ hbGIS <- function(gisdir = "", } else if (locationNames[i] %in% locationNames_nbh) { # condition that only needs to be used if table element is present at_table = ifelse(test = locationNames[i] %in% locationNames_table == TRUE, yes = paste0("!at_", locationNames[i], " &"), no = "") - CONF[cnt, ] = c("palmsplus_domain", + CONF[cnt, ] = c("where_field", paste0(locationNames[i], "_nbh"), paste0(at_table, " at_", locationNames[i], "_nbh", " & (!vehicle)"), # removed !pedestrian & !bicycle & because unclear why these are not possible in a neighbourhood, e.g. park @@ -325,25 +300,25 @@ hbGIS <- function(gisdir = "", NA, "", "") } cnt = cnt + 1 - # palmsplus_field: + # whenwhat_field: #------------------- if (!is.null(loca[[i]][[1]])) { # only do this if there is table data (meaning that location is linked to participant basis file) if (locationNames[i] == "home") { - CONF[cnt, ] = c("palmsplus_field", + CONF[cnt, ] = c("whenwhat_field", paste0("at_", locationNames[i]), paste0("palms_in_polygon(datai, polygons = dplyr::filter(", locationNames[i],", identifier == i), identifier)"), NA, "", "", "") cnt = cnt + 1 - CONF[cnt, ] = c("palmsplus_field", + CONF[cnt, ] = c("whenwhat_field", paste0("at_", locationNames[i], "_nbh"), paste0("palms_in_polygon(datai, polygons = dplyr::filter(", locationNames[i], "_nbh, identifier == i), identifier)"), NA, "", "", "") cnt = cnt + 1 } else { - CONF[cnt, ] = c("palmsplus_field", + CONF[cnt, ] = c("whenwhat_field", paste0("at_", locationNames[i]), paste0("palms_in_polygon(datai, polygons = dplyr::filter(", locationNames[i],",", locationNames[i], @@ -351,7 +326,7 @@ hbGIS <- function(gisdir = "", locationNames[i], "_id)))"), NA, "", "", "") cnt = cnt + 1 - CONF[cnt, ] = c("palmsplus_field", + CONF[cnt, ] = c("whenwhat_field", paste0("at_", locationNames[i], "_nbh"), paste0("palms_in_polygon(datai, polygons = dplyr::filter(", locationNames[i], "_nbh,", locationNames[i], @@ -363,9 +338,9 @@ hbGIS <- function(gisdir = "", } else { # locations not in linkagefile # note that colSums will ensure that sublocation are combined - CONF[cnt, ] = c("palmsplus_field", + CONF[cnt, ] = c("whenwhat_field", paste0("at_", locationNames[i], "_nbh"), - paste0("suppressMessages(colSums(st_contains(", locationNames[i], "_nbh, datai, sparse = FALSE)))"), + paste0("suppressMessages(colSums(sf::st_contains(", locationNames[i], "_nbh, datai, sparse = FALSE)))"), NA, "", "", "") cnt = cnt + 1 } @@ -391,15 +366,15 @@ hbGIS <- function(gisdir = "", CONF = CONF[!duplicated(CONF),] } if (verbose) cat(">>>\n\n") - palmsplusr_field_rows = which(CONF$context == "palmsplus_field") - palmsplus_fields = tibble(name = CONF$name[palmsplusr_field_rows], - formula = CONF$formula[palmsplusr_field_rows], - domain_field = CONF$domain_field[palmsplusr_field_rows]) + whenwhat_field_rows = which(CONF$context == "whenwhat_field") + whenwhat_field = tibble(name = CONF$name[whenwhat_field_rows], + formula = CONF$formula[whenwhat_field_rows], + is_where_field = CONF$is_where_field[whenwhat_field_rows]) - palmsplusr_domain_rows = which(CONF$context == "palmsplus_domain") - palmsplus_domains = tibble(name = CONF$name[palmsplusr_domain_rows], - formula = CONF$formula[palmsplusr_domain_rows], - domain_field = CONF$domain_field[palmsplusr_domain_rows]) + where_field_rows = which(CONF$context == "where_field") + where_field = tibble(name = CONF$name[where_field_rows], + formula = CONF$formula[where_field_rows], + is_where_field = CONF$is_where_field[where_field_rows]) #============================= # trajectory_fields trajectory_field_rows = which(CONF$context == "trajectory_field") @@ -417,12 +392,37 @@ hbGIS <- function(gisdir = "", trajectory_locations = tibble(name = CONF$name[trajectory_location_rows], start_criteria = CONF$start_criteria[trajectory_location_rows], end_criteria = CONF$end_criteria[trajectory_location_rows]) + write.csv(x = CONF[order(CONF$context), ], file = paste0(outputFolder, "/formula_log.csv"), row.names = FALSE) + #=============================================== + # Load linkage file and identify which PALMS ids and home/school + # ids are missing, but allow for publiclocations that are not linked + # to an ID + #=============================================== + if (length(gislinkfile) > 0) { + participant_basis = read_csv(gislinkfile, show_col_types = FALSE) + + + # Check for missing IDs ------------------------------------------------------------------------- + withoutMissingId = check_missing_id(participant_basis, outputFolder, dataset_name, palms, + loca, groupinglocation = groupinglocation, + verbose = verbose) + palms = withoutMissingId$palms + participant_basis = withoutMissingId$participant_basis + loca = withoutMissingId$loca + write.csv(participant_basis, paste0(outputFolder, "/", stringr::str_interp("participant_basis_${dataset_name}.csv"))) # store file for logging purposes only + if (length(participant_basis) == 0 || nrow(participant_basis) == 0) { + stop("\nParticipant basis file does not include references for the expected recording IDs") + } + } else { + participant_basis = "" + } + # Run palmsplusr ---------------------------------------------------------- - fns = c(paste0(palmsplus_folder, "/", dataset_name, "_palmsplus.csv"), - paste0(palmsplus_folder, "/", dataset_name, "_days.csv"), - paste0(palmsplus_folder, "/", dataset_name, "_trajectories.csv"), - paste0(palmsplus_folder, "/", dataset_name, "_multimodal.csv")) + fns = c(paste0(outputFolder, "/", dataset_name, "_whenwhatwhere.csv"), + paste0(outputFolder, "/", dataset_name, "_days.csv"), + paste0(outputFolder, "/", dataset_name, "_trajectories.csv"), + paste0(outputFolder, "/", dataset_name, "_multimodal.csv")) for (fn in fns) { if (file.exists(fn)) file.remove(fn) } @@ -430,26 +430,26 @@ hbGIS <- function(gisdir = "", for (i in 1:Nlocations) { Nlocation_objects = c(Nlocation_objects, length(loca[[i]][[2]])) # at least a nbh object is expected #length(loca[[i]][[1]]), } - if (verbose) cat("\n<<< building palmsplus...\n") - if (length(palms) > 0 & length(palmsplus_fields) & + if (verbose) cat("\n<<< building whenwhatwhere...\n") + if (length(palms) > 0 & length(whenwhat_field) & all(Nlocation_objects > 0) & length(participant_basis) > 0) { - palmsplus <- build_hbGIS(data = palms, - palmsplus_fields = palmsplus_fields, + whenwhat <- build_whenwhatwhere(data = palms, + whenwhat_field = whenwhat_field, loca = loca, participant_basis = participant_basis, verbose = verbose) - write_csv(palmsplus, file = fns[1]) + write_csv(whenwhat, file = fns[1]) if (verbose) cat(">>>\n") } else { if (verbose) cat("skipped because insufficient input data>>>\n") } if (verbose) cat("\n<<< building days...") - if (length(palmsplus) > 0 & length(palmsplus_domains) > 0 & length(palmsplus_fields) & + if (length(whenwhat) > 0 & length(where_field) > 0 & length(whenwhat_field) & all(Nlocation_objects > 0) & length(participant_basis) > 0) { - days <- build_days(data = palmsplus, - palmsplus_domains = palmsplus_domains, - palmsplus_fields = palmsplus_fields, + days <- build_days(data = whenwhat, + where_field = where_field, + whenwhat_field = whenwhat_field, loca = loca, participant_basis = participant_basis, verbose = verbose) @@ -466,8 +466,8 @@ hbGIS <- function(gisdir = "", trajectory_locations = trajectory_locations[order(trajectory_locations$name),] if (verbose) cat("\n<<< building trajectories...\n") - if (length(palmsplus) > 0 & length(trajectory_fields) > 0) { - trajectories <- build_trajectories(data = palmsplus, + if (length(whenwhat) > 0 & length(trajectory_fields) > 0) { + trajectories <- build_trajectories(data = whenwhat, trajectory_fields = trajectory_fields, trajectory_locations = trajectory_locations) @@ -478,8 +478,8 @@ hbGIS <- function(gisdir = "", # browser() if (length(trajectories) > 0) { write_csv(trajectories, file = fns[3]) - shp_file = paste0(palmsplus_folder, "/", dataset_name, "_trajectories.shp") - if (writeshp == TRUE) { + shp_file = paste0(outputFolder, "/", dataset_name, "_trajectories.shp") + if (write_shp == TRUE) { if (file.exists(shp_file)) file.remove(shp_file) # remove because st_write does not know how to overwrite sf::st_write(obj = trajectories, dsn = shp_file) } @@ -492,18 +492,18 @@ hbGIS <- function(gisdir = "", if (verbose) cat("skipped because insufficient input data>>>\n") } if (verbose) cat("\n<<< building multimodal...\n") - if (length(palmsplus) > 0 & length(multimodal_fields) > 0 & length(trajectory_locations) > 0) { + if (length(whenwhat) > 0 & length(multimodal_fields) > 0 & length(trajectory_locations) > 0) { multimodal <- build_multimodal(data = trajectories, spatial_threshold = 200, temporal_threshold = 10, - palmsplus = palmsplus, + whenwhat = whenwhat, multimodal_fields = multimodal_fields, trajectory_locations = trajectory_locations, verbose = verbose) if (length(multimodal) > 0) { write_csv(multimodal, file = fns[4]) - shp_file = paste0(palmsplus_folder, "/", dataset_name, "_multimodal.shp") - if (writeshp == TRUE) { + shp_file = paste0(outputFolder, "/", dataset_name, "_multimodal.shp") + if (write_shp == TRUE) { if (file.exists(shp_file)) file.remove(shp_file) # remove because st_write does not know how to overwrite sf::st_write(obj = multimodal, dsn = shp_file) } diff --git a/R/load_params.R b/R/load_params.R index 3338276..53ffe8b 100644 --- a/R/load_params.R +++ b/R/load_params.R @@ -26,7 +26,7 @@ load_params = function(file=c()) { colnames(params_merged)[which(colnames(params_merged) == "formula")] = "value" # colnames(params_merged)[which(colnames(params_merged) == "id")] = "field" colnames(params_merged)[which(colnames(params_merged) == "name")] = "subfield" - expected_tsv_columns = c(expected_tsv_columns, "domain_field", "after_conversion") + expected_tsv_columns = c(expected_tsv_columns, "is_where_field", "after_conversion") params = params_merged[, expected_tsv_columns] params = params[,-which(colnames(params) %in% c("subfield", "id", "field"))] diff --git a/README.md b/README.md index e245635..e5d620f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,58 @@ ![GitHub Actions R-CMD-check](https://github.com/habitus-eu/hbGIS/workflows/R-CMD-check-full/badge.svg) -... documentation to follow \ No newline at end of file + +# hbGIS + +R package to analyse relationships between when, where and what behaviour takes place, by combining the output from package hbGPS (or old software library palms) and GIS files. +The code in this packages is an adaptation of the code in the R package palmsplusr. +In particular it has been modified to: +- Be build as a CRAN package +- Be used inside the Shiny app HabitusGUI +- Be used based on a configuration file +- Be used as a generic tool + + +## Installation + +``` +install.packages("remotes") +remotes::install_github("habitus-eu/hbGIS") +``` + +## Usage + +The code below shows an example of how use hbGIS. + + +``` +library(hbGIS) +hbGIS(gisdir = "D:/myproject/GIS", # path to GIS file + palmsdir = "D:/myproject/hbGPSoutput", # path to palms or hbGPS output folder + gislinkfile = "D:/myproject/Tables/participant_basis.csv", # same as palmsplusr + outputdir = "D:/myproject/", # path to output folder + dataset_name = "myproject", # dataset name + configfile = "D:/myproject/config_hbGIS.csv", # hbGIS config file (see note below) + baselocation = "home", # base for individuals (leave empty if not available) + groupinglocation = "school", # grouping for individuals (leave empty if not available) + write_shp = FALSE, # whether to store shape files as output + split_GIS = TRUE, # whether to split GIS files in sublocations (only for public places) + sublocationID = "ID_NR") # column name in GIS file to identify sublocation + +``` + +Note: +- GIS filenames are used as location names and at the moment the code can only handle names that are shorter than 6 characters. +- Example config file can be found [here](https://github.com/habitus-eu/hbGIS/blob/main/inst/testfiles_hbGIS/config_hbGIS.csv) + +## Output + +hbGIS will create four output files: + +Name | Content +----------------------------|---------------- +$datasetname_whenwhatwhere.csv | Time series with information about when what where happened +$datasetname_days.csv | Day level summaries +$datasetname_trajectories.csv | Trajectory based summaries +$datasetname_multimodal.csv | Breakdown of trajectories by mode of transport + + diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index e1938be..3feb7fa 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -5,5 +5,7 @@ \itemize{ \item First release, based on code from the HabitusGUI and palmsplusr libraries \item Expansion with ability to handle public locations not tight to an individual + \item Revise terminology from domains and fields to when, what, and where + \item Export formula log to clarify to user what formulas where used } } diff --git a/inst/dev_code_hbGIS.R b/inst/dev_code_hbGIS.R index ead1284..e53130f 100644 --- a/inst/dev_code_hbGIS.R +++ b/inst/dev_code_hbGIS.R @@ -23,24 +23,22 @@ library(lwgeom) # gislinkfile = NULL, # outputdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/JasperNov2023", # dataset_name = "JasperNov2023", -# configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/config_palmsplusr.csv") -# kkk +# configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/config_hbGIS.csv") # hbGIS(gisdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/GIS", palmsdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/hbGPSoutput", gislinkfile = NULL, # "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/Tables/participant_basis.csv", outputdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010", dataset_name = "NBBB2010", - configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/config_palmsplusr.csv") -kkk + configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/NBBB2010/config_hbGIS.csv") # Belgium dataset -hbGIS(gisdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/GIS/", - palmsdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/hbGPSoutput", - gislinkfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/Tables/participant_basis.csv", - outputdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata", - dataset_name = "BEtestdata", - configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/config_palmsplusr.csv") +# hbGIS(gisdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/GIS/", +# palmsdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/hbGPSoutput", +# gislinkfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/Tables/participant_basis.csv", +# outputdir = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata", +# dataset_name = "BEtestdata", +# configfile = "D:/Dropbox/Work/sharedfolder/DATA/Habitus/GPSprocessing/BEtestdata/config_hbGIS.csv") jkjj # hbGIS(gisdir = "/media/vincent/projects/Habitus/palmsplusr/testdata/GIS", # palmsdir = "/media/vincent/projects/Habitus/palmsplusr/testdata/PALMS_output/", diff --git a/inst/testfiles_hbGIS/config_hbGIS.csv b/inst/testfiles_hbGIS/config_hbGIS.csv index 32a1d04..b114d55 100644 --- a/inst/testfiles_hbGIS/config_hbGIS.csv +++ b/inst/testfiles_hbGIS/config_hbGIS.csv @@ -1,47 +1,46 @@ -"context","name","formula","domain_field","after_conversion" -"palmsplus_field","weekday","dow < 6",FALSE,NA -"palmsplus_field","weekend","dow > 5",FALSE,NA -"palmsplus_field","indoors","iov == 3",FALSE,NA -"palmsplus_field","outdoors","iov == 1",FALSE,NA -"palmsplus_field","in_vehicle","iov == 2",FALSE,NA -"palmsplus_field","inserted","fixtypecode == 6",FALSE,NA -"palmsplus_field","pedestrian","tripmot == 1",FALSE,NA -"palmsplus_field","bicycle","tripmot == 2",FALSE,NA -"palmsplus_field","vehicle","tripmot == 3",FALSE,NA -"palmsplus_field","nonwear","activityintensity < 0",TRUE,NA -"palmsplus_field","wear","activityintensity >= 0",TRUE,NA -"palmsplus_field","sedentary","activityintensity == 0",TRUE,NA -"palmsplus_field","light","activityintensity == 1",TRUE,NA -"palmsplus_field","moderate","activityintensity == 2",TRUE,NA -"palmsplus_field","vigorous","activityintensity == 3",TRUE,NA -"palmsplus_field","mvpa","moderate + vigorous",TRUE,NA -"trajectory_field","mot","first(tripmot)",NA,FALSE -"trajectory_field","date","first(as.Date(datetime))",NA,FALSE -"trajectory_field","start","datetime[triptype==1]",NA,FALSE -"trajectory_field","end","datetime[triptype==4]",NA,FALSE -"trajectory_field","duration","as.numeric(difftime(end, start, units = ""secs"") + 30)",NA,FALSE -"trajectory_field","nonwear","sum(activityintensity < 0) * 15",NA,FALSE -"trajectory_field","wear","sum(activityintensity >= 0) * 15",NA,FALSE -"trajectory_field","sedentary","sum(activityintensity == 1) * 15",NA,FALSE -"trajectory_field","light","sum(activityintensity == 1) * 15",NA,FALSE -"trajectory_field","moderate","sum(activityintensity == 2) * 15",NA,FALSE -"trajectory_field","vigorous","sum(activityintensity == 3) * 15",NA,FALSE -"trajectory_field","mvpa","moderate + vigorous",NA,FALSE -"trajectory_field","length","as.numeric(st_length(geometry))",NA,TRUE -"trajectory_field","speed","(length / duration) * 3.6",NA,TRUE -"multimodal_field","duration","sum",NA,NA -"multimodal_field","nonwear","sum",NA,NA -"multimodal_field","wear","sum",NA,NA -"multimodal_field","sedentary","sum",NA,NA -"multimodal_field","light","sum",NA,NA -"multimodal_field","moderate","sum",NA,NA -"multimodal_field","vigorous","sum",NA,NA -"multimodal_field","mvpa","sum",NA,NA -"multimodal_field","length","sum",NA,NA -"multimodal_field","speed","mean",NA,NA -"palmsplus_domain","home","at_home",TRUE,NA -"palmsplus_domain","school","(!at_home & at_school)",TRUE,NA -"palmsplus_domain","transport","!at_home & !(at_school) & (pedestrian | bicycle | vehicle)",TRUE,NA -"palmsplus_domain","home_nbh","!at_home & !(at_school) & (!pedestrian & !bicycle & !vehicle) & at_home_nbh",TRUE,NA -"palmsplus_domain","school_nbh","!at_home & !(at_school) & (!pedestrian & !bicycle & !vehicle) & !(at_home_nbh) & at_school_nbh",TRUE,NA -"palmsplus_domain","other","!at_home & !(at_school) & (!pedestrian & !bicycle & !vehicle) & !(at_home_nbh) & !(at_school_nbh)",TRUE,NA +context,name,formula,is_where_field,after_conversion +whenwhat_field,weekday,dow < 6,FALSE,NA +whenwhat_field,weekend,dow > 5,FALSE,NA +whenwhat_field,indoors,iov == 3,FALSE,NA +whenwhat_field,outdoors,iov == 1,FALSE,NA +whenwhat_field,in_vehicle,iov == 2,FALSE,NA +whenwhat_field,inserted,fixtypecode == 6,FALSE,NA +whenwhat_field,pedestrian,tripmot == 1,FALSE,NA +whenwhat_field,bicycle,tripmot == 2,FALSE,NA +whenwhat_field,vehicle,tripmot == 3,FALSE,NA +whenwhat_field,nonwear,activityintensity < 0,TRUE,NA +whenwhat_field,wear,activityintensity >= 0,TRUE,NA +whenwhat_field,sedentary,activityintensity == 0,TRUE,NA +whenwhat_field,light,activityintensity == 1,TRUE,NA +whenwhat_field,moderate,activityintensity == 2,TRUE,NA +whenwhat_field,vigorous,activityintensity == 3,TRUE,NA +whenwhat_field,mvpa,moderate + vigorous,TRUE,NA +trajectory_field,mot,first(tripmot),NA,FALSE +trajectory_field,date,first(as.Date(datetime)),NA,FALSE +trajectory_field,start,datetime[triptype==1],NA,FALSE +trajectory_field,end,datetime[triptype==4],NA,FALSE +trajectory_field,duration,"as.numeric(difftime(end, start, units = ""secs"") + 30)",NA,FALSE +trajectory_field,nonwear,sum(activityintensity < 0) * 15,NA,FALSE +trajectory_field,wear,sum(activityintensity >= 0) * 15,NA,FALSE +trajectory_field,sedentary,sum(activityintensity == 1) * 15,NA,FALSE +trajectory_field,light,sum(activityintensity == 1) * 15,NA,FALSE +trajectory_field,moderate,sum(activityintensity == 2) * 15,NA,FALSE +trajectory_field,vigorous,sum(activityintensity == 3) * 15,NA,FALSE +trajectory_field,mvpa,moderate + vigorous,NA,FALSE +trajectory_field,length,as.numeric(st_length(geometry)),NA,TRUE +trajectory_field,speed,(length / duration) * 3.6,NA,TRUE +multimodal_field,duration,sum,NA,NA +multimodal_field,nonwear,sum,NA,NA +multimodal_field,wear,sum,NA,NA +multimodal_field,sedentary,sum,NA,NA +multimodal_field,light,sum,NA,NA +multimodal_field,moderate,sum,NA,NA +multimodal_field,vigorous,sum,NA,NA +multimodal_field,mvpa,sum,NA,NA +multimodal_field,length,sum,NA,NA +multimodal_field,speed,mean,NA,NA +general,groupinglocation,school,NA,NA +general,baselocation,home,NA,NA +general,write_shp,FALSE,NA,NA +general,split_GIS,TRUE,NA,NA +general,sublocationID,OBJECTID,NA,NA diff --git a/inst/testfiles_hbGIS/loc_park.CPG b/inst/testfiles_hbGIS/loc_park.CPG new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/inst/testfiles_hbGIS/loc_park.CPG @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/inst/testfiles_hbGIS/loc_park.dbf b/inst/testfiles_hbGIS/loc_park.dbf new file mode 100644 index 0000000..13dfe5b Binary files /dev/null and b/inst/testfiles_hbGIS/loc_park.dbf differ diff --git a/inst/testfiles_hbGIS/loc_park.prj b/inst/testfiles_hbGIS/loc_park.prj new file mode 100644 index 0000000..f45cbad --- /dev/null +++ b/inst/testfiles_hbGIS/loc_park.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/inst/testfiles_hbGIS/loc_park.sbn b/inst/testfiles_hbGIS/loc_park.sbn new file mode 100644 index 0000000..22ad9f3 Binary files /dev/null and b/inst/testfiles_hbGIS/loc_park.sbn differ diff --git a/inst/testfiles_hbGIS/loc_park.sbx b/inst/testfiles_hbGIS/loc_park.sbx new file mode 100644 index 0000000..28c40df Binary files /dev/null and b/inst/testfiles_hbGIS/loc_park.sbx differ diff --git a/inst/testfiles_hbGIS/loc_park.shp b/inst/testfiles_hbGIS/loc_park.shp new file mode 100644 index 0000000..b9a4cd1 Binary files /dev/null and b/inst/testfiles_hbGIS/loc_park.shp differ diff --git a/inst/testfiles_hbGIS/loc_park.shx b/inst/testfiles_hbGIS/loc_park.shx new file mode 100644 index 0000000..0f98733 Binary files /dev/null and b/inst/testfiles_hbGIS/loc_park.shx differ diff --git a/inst/testfiles_hbGIS/params_description_hbGIS.tsv b/inst/testfiles_hbGIS/params_description_hbGIS.tsv index 73f1211..eac75b2 100644 --- a/inst/testfiles_hbGIS/params_description_hbGIS.tsv +++ b/inst/testfiles_hbGIS/params_description_hbGIS.tsv @@ -1,24 +1,24 @@ parameter field subfield display class minimum maximum set priority description -weekday palmsplus_field TRUE formula 0 description missing -weekend palmsplus_field TRUE formula 0 description missing -indoors palmsplus_field TRUE formula 0 description missing -outdoors palmsplus_field TRUE formula 0 description missing -in_vehicle palmsplus_field TRUE formula 0 description missing -inserted palmsplus_field TRUE formula 0 description missing -pedestrian palmsplus_field TRUE formula 0 description missing -bicycle palmsplus_field TRUE formula 0 description missing -vehicle palmsplus_field TRUE formula 0 description missing -nonwear palmsplus_field TRUE formula 0 description missing -wear palmsplus_field TRUE formula 0 description missing -sedentary palmsplus_field TRUE formula 0 description missing -light palmsplus_field TRUE formula 0 description missing -moderate palmsplus_field TRUE formula 0 description missing -vigorous palmsplus_field TRUE formula 0 description missing -mvpa palmsplus_field TRUE formula 0 description missing -at_home palmsplus_field TRUE formula 0 description missing -at_school palmsplus_field TRUE formula 0 description missing -at_home_nbh palmsplus_field TRUE formula 0 description missing -at_school_nbh palmsplus_field TRUE formula 0 description missing +weekday whenwhat_field TRUE formula 0 description missing +weekend whenwhat_field TRUE formula 0 description missing +indoors whenwhat_field TRUE formula 0 description missing +outdoors whenwhat_field TRUE formula 0 description missing +in_vehicle whenwhat_field TRUE formula 0 description missing +inserted whenwhat_field TRUE formula 0 description missing +pedestrian whenwhat_field TRUE formula 0 description missing +bicycle whenwhat_field TRUE formula 0 description missing +vehicle whenwhat_field TRUE formula 0 description missing +nonwear whenwhat_field TRUE formula 0 description missing +wear whenwhat_field TRUE formula 0 description missing +sedentary whenwhat_field TRUE formula 0 description missing +light whenwhat_field TRUE formula 0 description missing +moderate whenwhat_field TRUE formula 0 description missing +vigorous whenwhat_field TRUE formula 0 description missing +mvpa whenwhat_field TRUE formula 0 description missing +at_home whenwhat_field TRUE formula 0 description missing +at_school whenwhat_field TRUE formula 0 description missing +at_home_nbh whenwhat_field TRUE formula 0 description missing +at_school_nbh whenwhat_field TRUE formula 0 description missing mot trajectory_field TRUE formula 0 description missing date trajectory_field TRUE formula 0 description missing start trajectory_field TRUE formula 0 description missing @@ -47,9 +47,8 @@ vigorous multimodal_field TRUE formula 0 description missing mvpa multimodal_field TRUE formula 0 description missing length multimodal_field TRUE formula 0 description missing speed multimodal_field TRUE formula 0 description missing -home palmsplus_domain TRUE formula 0 description missing -school palmsplus_domain TRUE formula 0 description missing -transport palmsplus_domain TRUE formula 0 description missing -home_nbh palmsplus_domain TRUE formula 0 description missing -school_nbh palmsplus_domain TRUE formula 0 description missing -other palmsplus_domain TRUE formula 0 description missing +groupinglocation general TRUE character 0 description missing +baselocation general TRUE character 0 description missing +write_shp general TRUE set TRUE;FALSE 0 description missing +split_GIS general TRUE set TRUE;FALSE 0 description missing +sublocationID general TRUE character 0 description missing diff --git a/man/build_days.Rd b/man/build_days.Rd index d977381..d71ef80 100644 --- a/man/build_days.Rd +++ b/man/build_days.Rd @@ -2,42 +2,35 @@ % Please edit documentation in R/build_days.R \name{build_days} \alias{build_days} -\title{Calculate day-level summaries from the palmsplus dataset} +\title{Calculate day-level summaries from the whenwhat dataset} \usage{ build_days( data = NULL, verbose = TRUE, - palmsplus_domains = NULL, - palmsplus_fields = NULL, + where_field = NULL, + whenwhat_field = NULL, loca = NULL, participant_basis = NULL ) } \arguments{ -\item{data}{The palmsplus data obtained from \code{\link{palms_build_palmsplus}}.} +\item{data}{The whenwhat data obtained from \code{\link{build_whenwhatwhere}}.} \item{verbose}{Print progress to console. Default is \code{TRUE}.} -\item{palmsplus_domains}{...} +\item{where_field}{...} -\item{palmsplus_fields}{...} +\item{whenwhat_field}{...} \item{loca}{Nested list with location information} -\item{participant_basis}{participant_basis} +\item{participant_basis}{participant_basis +#'} } \value{ A table summarised by day. } \description{ -Build a days dataset by summarising \code{palmsplus} -by day and person (\code{identifier}). Not all variables in \code{palmsplus} -are summarised, only those specified using \code{\link{palms_add_field}} with -the argument \code{domain_field = TRUE}. By default, a \code{duration} field -is added (e.g., the total minutes per day). - -All data are summarised by default. However, additional aggragation \emph{domains} -can be specified using \code{\link{palms_add_domain}} before building days. -Domains are a subset of data, such as during school time. All \code{domain_field} -variables will be summarised for each \emph{domain} seperatly. +Build a days dataset by summarising \code{whenwhat} +by day and person (\code{identifier}). } diff --git a/man/build_multimodal.Rd b/man/build_multimodal.Rd index cfc362c..2afee10 100644 --- a/man/build_multimodal.Rd +++ b/man/build_multimodal.Rd @@ -8,7 +8,7 @@ build_multimodal( data = NULL, spatial_threshold, temporal_threshold, - palmsplus = NULL, + whenwhat = NULL, verbose = TRUE, multimodal_fields = NULL, trajectory_locations = NULL @@ -21,7 +21,7 @@ build_multimodal( \item{temporal_threshold}{Temporal threshold in minutes} -\item{palmsplus}{The dataset build by \code{build_hbGIS}} +\item{whenwhat}{The dataset build by \code{build_hbGIS}} \item{verbose}{Print progress after each step. Default is \code{TRUE}.} diff --git a/man/build_hbGIS.Rd b/man/build_whenwhatwhere.Rd similarity index 53% rename from man/build_hbGIS.Rd rename to man/build_whenwhatwhere.Rd index 7863652..f997f1b 100644 --- a/man/build_hbGIS.Rd +++ b/man/build_whenwhatwhere.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/build_hbGIS.R -\name{build_hbGIS} -\alias{build_hbGIS} -\title{Build the hbGIS dataset} +% Please edit documentation in R/build_whenwhatwhere.R +\name{build_whenwhatwhere} +\alias{build_whenwhatwhere} +\title{build_whenwhatwhere} \usage{ -build_hbGIS( +build_whenwhatwhere( data = NULL, verbose = TRUE, - palmsplus_fields = NULL, + whenwhat_field = NULL, loca = NULL, participant_basis = NULL ) @@ -17,13 +17,13 @@ build_hbGIS( \item{verbose}{Print progress to console after each iteration. Default is \code{TRUE}.} -\item{palmsplus_fields}{palmsplus_fields defined in hbGIS} +\item{whenwhat_field}{whenwhat_field defined in hbGIS} \item{loca}{Nested list with location information} \item{participant_basis}{participant_basis} } \description{ -Build the \code{hbGIS} dataset by adding additional columns to the hbGPS output data. -The additional columns are specified using \code{\link{palms_add_field}}. +Build the whenwhat dataset by adding additional columns to the +hbGPS output data based on whenwhat_field. } diff --git a/man/hbGIS.Rd b/man/hbGIS.Rd index 5fa9459..b9eee7b 100644 --- a/man/hbGIS.Rd +++ b/man/hbGIS.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/hbGIS.R \name{hbGIS} \alias{hbGIS} -\title{palmsplusr_shiny} +\title{hbGIS} \usage{ hbGIS( gisdir = "", @@ -10,8 +10,13 @@ hbGIS( gislinkfile = "", outputdir = "", dataset_name = "", - configfile = "", - verbose = TRUE + configfile = NULL, + verbose = TRUE, + baselocation = NULL, + groupinglocation = NULL, + write_shp = NULL, + split_GIS = NULL, + sublocationID = NULL ) } \arguments{ @@ -28,10 +33,20 @@ hbGIS( \item{configfile}{Configuration file} \item{verbose}{verbose Boolean} + +\item{baselocation}{character, to specify reference location for individuals, e.g. home} + +\item{groupinglocation}{character, to specify reference location location for groups, e.g. school} + +\item{write_shp}{boolean, to indicate whether shape file should be written} + +\item{split_GIS}{boolean, to indicate whether sublocation inside GIS files are to be split} + +\item{sublocationID}{character, GIS column name to be used as identifier for sublocations} } \value{ palms_to_clean_lower object } \description{ -palmsplusr_shiny +hbGIS } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..a7f999e --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,3 @@ +library("testthat") +library("hbGIS") +test_check("hbGIS") \ No newline at end of file diff --git a/tests/testthat/test_hbGIS.R b/tests/testthat/test_hbGIS.R new file mode 100644 index 0000000..3a2b105 --- /dev/null +++ b/tests/testthat/test_hbGIS.R @@ -0,0 +1,81 @@ +library(hbGIS) +context("hbGIS pipeline") +test_that("hbGIS pipeline process file", { + # + # # Prepare input data + # GIS_files = dir(system.file("testfiles_hbGIS", package = "hbGIS"), full.names = TRUE, pattern = "loc_") + # dn1 = "./GIS" + # if (!dir.exists(dn1)) { + # dir.create(dn1, recursive = TRUE) + # } + # for (fn in GIS_files) file.copy(from = fn, to = dn1) + # + # hbGPSoutput_file = system.file("testfiles_hbGIS/hbGPS_out.csv", package = "hbGIS") + # dn2 = "./hbGPSoutput" + # if (!dir.exists(paths = dn2)) { + # dir.create(path = dn2) + # } + # file.copy(from = hbGPSoutput_file, to = dn2) + # + # # Prepare output folder + # outdir = "./output" + # if (!dir.exists(outdir)) { + # dir.create(outdir, recursive = TRUE) + # } + # + # # Run pipeline + # hbGIS(gisdir = "./GIS", + # palmsdir = dn2, # note: function will simulat palms data if palmsdir not provided + # gislinkfile = NULL, + # outputdir = outdir, + # dataset_name = "test", + # verbose = FALSE, + # sublocationID = "ignore") + # + # # Check days + # file = paste0(outdir, "/hbGIS_output/test_days.csv") + # expect_true(file.exists(file)) + # test_days = read.csv(file) + # expect_equal(length(test_days), 37) + # expect_equal(sum(test_days[, 3:ncol(test_days)], na.rm = TRUE), 24573.25) + # + # # Check whenwhatwhere + # file = paste0(outdir, "/hbGIS_output/test_whenwhatwhere.csv") + # expect_true(file.exists(file)) + # test_whenwhatwhere = read.csv(file) + # expect_equal(nrow(test_whenwhatwhere), 21523) + # expect_equal(ncol(test_whenwhatwhere), 30) + # expect_equal(sum(test_whenwhatwhere[, 3:(ncol(test_whenwhatwhere) - 1)]), 558765.2) + # + # # Check trajectories + # file = paste0(outdir, "/hbGIS_output/test_trajectories.csv") + # expect_true(file.exists(file)) + # test_trajectories = read.csv(file) + # expect_equal(nrow(test_trajectories), 50) + # expect_equal(ncol(test_trajectories), 18) + # expect_equal(sum(test_trajectories$length), 21993.49, tol = 0.1) + # expect_equal(sum(test_trajectories$speed), 212.9411, tol = 0.001) + # + # + # # Check multimodal + # file = paste0(outdir, "/hbGIS_output/test_multimodal.csv") + # expect_true(file.exists(file)) + # test_multimodal = read.csv(file) + # expect_equal(nrow(test_multimodal), 2) + # expect_equal(ncol(test_multimodal), 31) + # expect_equal(test_multimodal$trip_numbers, c(1, 2)) + # expect_equal(test_multimodal$start, c("2023-11-30T13:50:00Z", "2023-11-30T14:25:00Z")) + # expect_equal(test_multimodal$end, c("2023-11-30T13:54:00Z", "2023-11-30T14:29:00Z")) + # + # # Check formula_log + # file = paste0(outdir, "/hbGIS_output/formula_log.csv") + # expect_true(file.exists(file)) + # test_formula_log = read.csv(file) + # expect_equal(nrow(test_formula_log), 45) + # expect_equal(ncol(test_formula_log), 7) + # + # # Clean up + # if (dir.exists(dn)) unlink(dn, recursive = TRUE) + # if (dir.exists(outdir)) unlink(outdir, recursive = TRUE) + # if (file.exists("./output_test_error_list.csv")) file.remove("./output_test_error_list.csv") +})