From a163991ffb388dd1e7f6ecabfffba3c563f26de2 Mon Sep 17 00:00:00 2001 From: SpatLyu Date: Thu, 20 Jun 2024 20:27:33 +0800 Subject: [PATCH] test spade pseudo pvalue calculation --- NAMESPACE | 4 ++ R/globals.R | 2 +- R/pseudop.R | 163 +++++++++++++++++++++++++++++++++++++++--- R/spEcula_helper.R | 22 ++++++ man/psd_pseudop.Rd | 38 ++++++++++ man/psmd_pseudop.Rd | 79 ++++++++++++++++++++ man/shuffle_vector.Rd | 21 ++++++ 7 files changed, 320 insertions(+), 9 deletions(-) create mode 100644 man/psd_pseudop.Rd create mode 100644 man/psmd_pseudop.Rd create mode 100644 man/shuffle_vector.Rd diff --git a/NAMESPACE b/NAMESPACE index 872e080a..d7155df3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,11 +17,14 @@ export(gozh) export(interaction_detector) export(inverse_distance_weight) export(opgd) +export(psd_pseudop) export(psd_spade) +export(psmd_pseudop) export(psmd_spade) export(rgd) export(risk_detector) export(robust_disc) +export(shuffle_vector) export(spade) export(spvar) export(st_unidisc) @@ -60,6 +63,7 @@ importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,dist) importFrom(stats,pf) +importFrom(stats,runif) importFrom(stats,t.test) importFrom(stats,var) importFrom(tibble,as_tibble) diff --git a/R/globals.R b/R/globals.R index d8868589..1376dda5 100644 --- a/R/globals.R +++ b/R/globals.R @@ -3,4 +3,4 @@ utils::globalVariables(c(".", "Ecological", "Interaction", "Interactive variable", "Q-statistic", "Risk", "qstatistic", "variable", "variable1", - "variable2", "x", "zone1", "zone2", "id_sample")) + "variable2", "x", "zone1", "zone2", "id_sample_new")) diff --git a/R/pseudop.R b/R/pseudop.R index 831f0d70..5c33160d 100644 --- a/R/pseudop.R +++ b/R/pseudop.R @@ -1,8 +1,155 @@ -# pseudo_p = \(qs, -# alpha = 0.05, -# permutations = 999, -# permutation_method = "complete", -# seed = 123456789){ -# M = permutations -# pp = (R + 1) / (M + 1) -# } +#' @title calculate power of spatial determinant(PSD) and the corresponding pseudo-p value +#' @author Wenbo Lv \email{lyu.geosocial@gmail.com} +#' @description +#' Function for calculate power of spatial determinant \eqn{q_s}. +#' @details +#' The power of spatial determinant formula is +#' \eqn{q_s = 1 - \frac{\sum_{h=1}^L N_h \Gamma_h}{N \Gamma}} +#' +#' @references +#' Xuezhi Cang & Wei Luo (2018) Spatial association detector (SPADE),International +#' Journal of Geographical Information Science, 32:10, 2055-2075, DOI: 10.1080/13658816.2018.1476693 +#' +#' @param y Variable Y, continuous numeric vector. +#' @param x Covariable X, \code{factor}, \code{character} or \code{discrete numeric}. +#' @param wt The spatial weight matrix. +#' @param cores (optional) A positive integer(default is 1). If cores > 1, use parallel computation. +#' @param seed (optional) Random seed number, default is `123456789`. +#' @param permutations (optional) The number of permutations for the PSD computation. Default is `99`. +#' +#' @return A list of power of spatial determinant and the corresponding pseudo-p value. +#' @importFrom stats runif +#' @export +#' +psd_pseudop = \(y,x,wt,cores = 6, + seed = 123456789, + permutations = 99){ + set.seed(seed) + permutation = stats::runif(permutations, min = 0, max = 1) + qs = psd_spade(y,x,wt) + + doclust = FALSE + if (cores > 1) { + doclust = TRUE + cores = parallel::makeCluster(cores) + on.exit(parallel::stopCluster(cores), add=TRUE) + } + + calcul_psd = \(p){ + xobs_shffule = shuffle_vector(x,p,seed = seed) + return(psd_spade(y,xobs_shffule,wt)) + } + + if (doclust) { + parallel::clusterExport(cores,c('st_unidisc','robust_disc','spvar', + 'psd_spade',"shuffle_vector")) + out_g = parallel::parLapply(cores,permutation,calcul_psd) + out_g = as.numeric(do.call(rbind, out_g)) + } else { + out_g = purrr::map_dbl(permutation,calcul_psd) + } + + R = sum(out_g >= qs) + pp = (R + 1) / (permutations + 1) + fd = list("Q-statistic" = qs, "P-value" = pp) + return(fd) +} + +#' @title power of spatial and multilevel discretization determinant(PSMD) and the corresponding pseudo-p value +#' @author Wenbo Lv \email{lyu.geosocial@gmail.com} +#' @description +#' Function for calculate power of spatial and multilevel discretization determinant and the corresponding pseudo-p value. +#' @details +#' The power of spatial and multilevel discretization determinant formula is +#' \eqn{PSMDQ_s = MEAN(Q_s)} +#' +#' @references +#' Xuezhi Cang & Wei Luo (2018) Spatial association detector (SPADE),International +#' Journal of Geographical Information Science, 32:10, 2055-2075, DOI: 10.1080/13658816.2018.1476693 +#' +#' @param formula A formula of calculate power of spatial and multilevel discretization determinant \eqn{PSMDQ_s}. +#' @param data A data.frame or tibble of observation data. +#' @param wt (optional) The spatial weight matrix.When `wt` is not provided, must provide `locations`. +#' And `gdverse` will use `locations` columns to construct spatial weight use `inverse_distance_weight()`. +#' @param locations (optional) The geospatial locations coordinate columns name which in `data`. +#' Useful and must provided when `wt` is not provided. +#' @param discnum (optional) Number of multilevel discretization.Default will use `3:15`. +#' @param discmethod (optional) The discretization methods. Default will use `quantile`. +#' When `discmethod` is `robust` use `robust_disc()`, others use `st_unidisc()`.Now only support +#' one `discmethod` at one time. +#' @param cores (optional) A positive integer(default is 1). If cores > 1, use parallel computation. +#' @param seed (optional) Random seed number, default is `123456789`. +#' @param permutations (optional) The number of permutations for the PSD computation. Default is `99`. +#' @param ... (optional) Other arguments passed to `st_unidisc()` or `robust_disc()`. +#' +#' @return A list of power of spatial and multilevel discretization determinant and the corresponding pseudo-p value. +#' @importFrom stats runif +#' @export +#' @examples +#' \dontrun{ +#' library(sf) +#' usfi = read_sf(system.file('extdata/USFI_Xian.gpkg',package = 'gdverse')) |> +#' dplyr::select(dplyr::all_of(c("NDVI","BH","SUHI"))) +#' coord = usfi |> +#' st_centroid() |> +#' st_coordinates() +#' usfi = usfi |> +#' dplyr::bind_cols(coord) |> +#' st_drop_geometry() +#' tictoc::tic() +#' psmd_pseudop('SUHI ~ BH',data = dplyr::select(usfi,SUHI,BH,X,Y), +#' locations = c('X','Y'),cores = 6) +#' tictoc::toc() +#' } +#' +psmd_pseudop = \(formula,data,wt = NULL,locations = NULL,discnum = NULL,discmethod = NULL, + cores = 6,seed = 123456789,permutations = 99, ...){ + set.seed(seed) + permutation = stats::runif(permutations, min = 0, max = 1) + qs = psmd_spade(formula,data,wt,locations,discnum,discmethod,cores,seed,...) + + doclust = FALSE + if (cores > 1) { + doclust = TRUE + cores = parallel::makeCluster(cores) + on.exit(parallel::stopCluster(cores), add=TRUE) + } + + formula = stats::as.formula(formula) + formula.vars = all.vars(formula) + if (formula.vars[2] == "."){ + if (length(!(which(colnames(data) %in% c(formula.vars[1],locations)))) > 1) { + stop('please only keep `dependent` and `independent` columns in `data`; When `wt` is not provided, please make sure `locations` coordinate columns is also contained in `data` .') + } else { + xname = colnames(data)[-which(colnames(data) %in% c(formula.vars[1],locations))] + } + } else { + xname = formula.vars[2][-which(formula.vars[2] %in% c(formula.vars[1],locations))] + } + + xobs = data[,xname,drop = TRUE] + calcul_psmd = \(p){ + xobs_shffule = shuffle_vector(xobs,p,seed = seed) + data[,xname] = xobs_shffule + return(psmd_spade(formula,data,wt,locations,discnum,discmethod,cores=1,seed,...)) + } + + if (doclust) { + parallel::clusterExport(cores,c('st_unidisc','robust_disc','spvar','shuffle_vector', + 'psd_spade','cpsd_spade','psmd_spade', + 'inverse_distance_weight')) + out_g = parallel::parLapply(cores,permutation,calcul_psmd) + out_g = as.numeric(do.call(rbind, out_g)) + } else { + out_g = purrr::map_dbl(permutation,calcul_psmd) + } + + R = sum(out_g >= qs) + pp = (R + 1) / (permutations + 1) + fd = list("Q-statistic" = qs, "P-value" = pp) + return(fd) +} + + + + diff --git a/R/spEcula_helper.R b/R/spEcula_helper.R index c7e92892..3553a978 100644 --- a/R/spEcula_helper.R +++ b/R/spEcula_helper.R @@ -32,3 +32,25 @@ inverse_distance_weight = \(locx,locy,power = 1,is_arc = FALSE){ wij = 1 / distij ^ power return(as.matrix(wij)) } + +#' @title randomly shuffling vector +#' +#' @param x A vector. +#' @param shuffle_rate The shuffling rate. +#' @param seed (optional) Random seed number. Default is `123456789`. +#' +#' @return A shuffled vector. +#' @export +#' +shuffle_vector = \(x,shuffle_rate,seed = 123456789){ + set.seed(seed) + n = length(x) + shuffle_size = floor(n * shuffle_rate) + if (shuffle_size == 0) { + return(x) + } else { + shuffle_indices = sample(1:n, size = shuffle_size) + new_x = c(x[shuffle_indices], x[-shuffle_indices]) + return(new_x) + } +} diff --git a/man/psd_pseudop.Rd b/man/psd_pseudop.Rd new file mode 100644 index 00000000..be2af19e --- /dev/null +++ b/man/psd_pseudop.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pseudop.R +\name{psd_pseudop} +\alias{psd_pseudop} +\title{calculate power of spatial determinant(PSD) and the corresponding pseudo-p value} +\usage{ +psd_pseudop(y, x, wt, cores = 6, seed = 123456789, permutations = 99) +} +\arguments{ +\item{y}{Variable Y, continuous numeric vector.} + +\item{x}{Covariable X, \code{factor}, \code{character} or \code{discrete numeric}.} + +\item{wt}{The spatial weight matrix.} + +\item{cores}{(optional) A positive integer(default is 1). If cores > 1, use parallel computation.} + +\item{seed}{(optional) Random seed number, default is \code{123456789}.} + +\item{permutations}{(optional) The number of permutations for the PSD computation. Default is \code{99}.} +} +\value{ +A list of power of spatial determinant and the corresponding pseudo-p value. +} +\description{ +Function for calculate power of spatial determinant \eqn{q_s}. +} +\details{ +The power of spatial determinant formula is +\eqn{q_s = 1 - \frac{\sum_{h=1}^L N_h \Gamma_h}{N \Gamma}} +} +\references{ +Xuezhi Cang & Wei Luo (2018) Spatial association detector (SPADE),International +Journal of Geographical Information Science, 32:10, 2055-2075, DOI: 10.1080/13658816.2018.1476693 +} +\author{ +Wenbo Lv \email{lyu.geosocial@gmail.com} +} diff --git a/man/psmd_pseudop.Rd b/man/psmd_pseudop.Rd new file mode 100644 index 00000000..3980d38d --- /dev/null +++ b/man/psmd_pseudop.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pseudop.R +\name{psmd_pseudop} +\alias{psmd_pseudop} +\title{power of spatial and multilevel discretization determinant(PSMD) and the corresponding pseudo-p value} +\usage{ +psmd_pseudop( + formula, + data, + wt = NULL, + locations = NULL, + discnum = NULL, + discmethod = NULL, + cores = 6, + seed = 123456789, + permutations = 99, + ... +) +} +\arguments{ +\item{formula}{A formula of calculate power of spatial and multilevel discretization determinant \eqn{PSMDQ_s}.} + +\item{data}{A data.frame or tibble of observation data.} + +\item{wt}{(optional) The spatial weight matrix.When \code{wt} is not provided, must provide \code{locations}. +And \code{gdverse} will use \code{locations} columns to construct spatial weight use \code{inverse_distance_weight()}.} + +\item{locations}{(optional) The geospatial locations coordinate columns name which in \code{data}. +Useful and must provided when \code{wt} is not provided.} + +\item{discnum}{(optional) Number of multilevel discretization.Default will use \code{3:15}.} + +\item{discmethod}{(optional) The discretization methods. Default will use \code{quantile}. +When \code{discmethod} is \code{robust} use \code{robust_disc()}, others use \code{st_unidisc()}.Now only support +one \code{discmethod} at one time.} + +\item{cores}{(optional) A positive integer(default is 1). If cores > 1, use parallel computation.} + +\item{seed}{(optional) Random seed number, default is \code{123456789}.} + +\item{permutations}{(optional) The number of permutations for the PSD computation. Default is \code{99}.} + +\item{...}{(optional) Other arguments passed to \code{st_unidisc()} or \code{robust_disc()}.} +} +\value{ +A list of power of spatial and multilevel discretization determinant and the corresponding pseudo-p value. +} +\description{ +Function for calculate power of spatial and multilevel discretization determinant and the corresponding pseudo-p value. +} +\details{ +The power of spatial and multilevel discretization determinant formula is +\eqn{PSMDQ_s = MEAN(Q_s)} +} +\examples{ +\dontrun{ +library(sf) +usfi = read_sf(system.file('extdata/USFI_Xian.gpkg',package = 'gdverse')) |> + dplyr::select(dplyr::all_of(c("NDVI","BH","SUHI"))) +coord = usfi |> + st_centroid() |> + st_coordinates() +usfi = usfi |> + dplyr::bind_cols(coord) |> + st_drop_geometry() +tictoc::tic() +psmd_pseudop('SUHI ~ BH',data = dplyr::select(usfi,SUHI,BH,X,Y), + locations = c('X','Y'),cores = 6) +tictoc::toc() +} + +} +\references{ +Xuezhi Cang & Wei Luo (2018) Spatial association detector (SPADE),International +Journal of Geographical Information Science, 32:10, 2055-2075, DOI: 10.1080/13658816.2018.1476693 +} +\author{ +Wenbo Lv \email{lyu.geosocial@gmail.com} +} diff --git a/man/shuffle_vector.Rd b/man/shuffle_vector.Rd new file mode 100644 index 00000000..323c3d42 --- /dev/null +++ b/man/shuffle_vector.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spEcula_helper.R +\name{shuffle_vector} +\alias{shuffle_vector} +\title{randomly shuffling vector} +\usage{ +shuffle_vector(x, shuffle_rate, seed = 123456789) +} +\arguments{ +\item{x}{A vector.} + +\item{shuffle_rate}{The shuffling rate.} + +\item{seed}{(optional) Random seed number. Default is \code{123456789}.} +} +\value{ +A shuffled vector. +} +\description{ +randomly shuffling vector +}