diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index aa92c1b..496f918 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,7 +1,10 @@ C:/Users/trakradmin/OneDrive - University of Leeds/Documents/ITS/projects/IPSOS/_______________latest/dts/banes/_techNote_IPSOSDefra_DTTrend_BANES_working_04.Rmd="EF13DF93" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/ITS/projects/Leeds_CPRPC/_working/climate_rough_01.Rmd="59DA471D" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/ITS/projects/NERC_TRANSISTION/events/Clean Air Networks Conference/naei estimate.Rmd="1BB48545" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/_isolateContribution&breakPointAnalysis_KR_20230824.R="F18B98A3" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/_paper_01_IntroToRespeciate/MS Access Versions/speciate_5.2_0/test.R="FCE2E494" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/_slides_02_CRC/r code 2/_marylebone_CRC_Slides_04.Rmd="5042A905" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/_slides_02_CRC/r code/_marylebone_CRC_Slides_04.Rmd="25A452C7" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_analysis_pls_01.Rmd="F2B723A3" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_initial_observations_01.Rmd="E72195E5" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/_projects/marylebone03/_marylebone_metals_03.Rmd="D2C38DFE" @@ -15,22 +18,23 @@ C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/NEWS.md="F6ED8BF4" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/respeciate-package.R="A43C9569" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/respeciate.generics.R="54ECE8F1" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.R="58F2A9BE" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.average.R="0A1E36E4" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.cluster.R="49C0F861" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.cor.R="3F1DA8E5" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.info.R="D0E7AC03" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.match.R="4326C1C3" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.pad.R="4962C940" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.plot.R="561F2EAC" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.pls.R="EACFE9A4" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.rescale.R="D668C5B2" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sp.reshape.R="CC655C60" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/speciate.R="6C6E673A" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/spq.R="3AD9ACC8" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/spx.R="CA18044A" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.R="787EA0C5" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.average.R="67ED42C3" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.build.R="5A264727" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.cluster.R="B197B439" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.cor.R="DE099ED6" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.info.R="FD1BAD48" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.match.R="7AE83929" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.pad.R="FEC8C57D" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.plot.R="80B907E9" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.pls.R="430E71B2" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.pls.plot.R="07565C15" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.q.R="2721C15F" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.rescale.R="2C292C00" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.reshape.R="94C8EF32" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/rsp.x.R="4DA91187" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/sysdata.R="82103C52" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/R/xxx.R="3415FF44" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/README.Rmd="887EDA27" C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/README.md="D46A00DB" -C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/man/spec.Rd="8472593F" +C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/respeciate/man/respeciate.generics.Rd="2897F12C" diff --git a/DESCRIPTION b/DESCRIPTION index 8c3d69a..a737222 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: respeciate Title: Speciation profiles for gases and aerosols -Version: 0.2.7 -Date: 2024-02-17 -Description: Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for +Version: 0.3.0 +Date: 2024-05-17 +Description: Access to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) . Type: Package Authors@R: c( person(given = "Sergio", family = "Ibarra-Espinosa", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index f7507c6..b66ae30 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,12 @@ # Generated by roxygen2: do not edit by hand +S3method(as.respeciate,default) S3method(plot,respeciate) +S3method(plot,rsp_pls) S3method(print,respeciate) S3method(print,rsp_pls) S3method(summary,respeciate) -export(find_code) +export(as.respeciate) export(pls_fit_species) export(pls_plot) export(pls_plot_profile) @@ -13,39 +15,39 @@ export(pls_rebuild) export(pls_refit_species) export(pls_report) export(pls_test) -export(sp_average_profile) -export(sp_build_rsp_x) -export(sp_dcast) -export(sp_dcast_profile) -export(sp_dcast_species) -export(sp_find_profile) -export(sp_find_species) -export(sp_info) -export(sp_match_profile) -export(sp_melt_wide) -export(sp_pad) -export(sp_plot_profile) -export(sp_plot_species) -export(sp_pls_profile) -export(sp_profile) -export(sp_profile_distance) -export(sp_profile_info) -export(sp_rescale) -export(sp_rescale_profile) -export(sp_rescale_species) -export(sp_species_cor) -export(sp_species_info) -export(spec) -export(spq_gas) -export(spq_other) -export(spq_pm) -export(spq_pm.ae6) -export(spq_pm.ae8) -export(spq_pm.cr1) -export(spq_pm.simplified) -export(spx_btex) -export(spx_copy) -export(spx_n_alkane) +export(rsp) +export(rsp_average_profile) +export(rsp_build_x) +export(rsp_cor_species) +export(rsp_dcast) +export(rsp_dcast_profile) +export(rsp_dcast_species) +export(rsp_distance_profile) +export(rsp_find_profile) +export(rsp_find_species) +export(rsp_info) +export(rsp_match_profile) +export(rsp_melt_wide) +export(rsp_pad) +export(rsp_plot_profile) +export(rsp_plot_species) +export(rsp_pls_profile) +export(rsp_profile) +export(rsp_profile_info) +export(rsp_q_gas) +export(rsp_q_other) +export(rsp_q_pm) +export(rsp_q_pm.ae6) +export(rsp_q_pm.ae8) +export(rsp_q_pm.cr1) +export(rsp_q_pm.simplified) +export(rsp_rescale) +export(rsp_rescale_profile) +export(rsp_rescale_species) +export(rsp_species_info) +export(rsp_x_btex) +export(rsp_x_copy) +export(rsp_x_nalkane) importFrom(data.table,":=") importFrom(grDevices,as.graphicsAnnot) importFrom(grDevices,cm.colors) @@ -92,6 +94,7 @@ importFrom(stats,formula) importFrom(stats,hclust) importFrom(stats,heatmap) importFrom(stats,lm) +importFrom(stats,na.omit) importFrom(stats,nls) importFrom(stats,nls.control) importFrom(stats,predict) diff --git a/NEWS.md b/NEWS.md index fcd9fba..6a48e0b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,26 @@ -# Version 0.0 - Release Notes +# Release Notes Version 0.3 + +* [0.3.0] + * released 2024-05-17 + * code and documentation refresh... + * added as.respeciate method; replacing related unexported code + * sp_profile, sp_build_rsp_x updates; both now use as.respeciate + * sp_profile to rsp_profile; sp_dcast/melt to rsp_dcast/melt; both as + part of object class rebuild + * sp_pad to rsp_pad; sp_rescale to rsp_rescale; class rebuild + * sp_match_profile to rsp_match_profile; class rebuild + * sp_cor to rsp_cor; sp_distance to rsp_dist; class rebuild + * rsp_plots now track factors + * pls_plots updated for object class + +# Release Notes Version 0.2 * [0.2.7] * released 2024-02-17 * sp_plot_species update; species order forced, added reset.x * sp_match_profile update; fit methods for pd and sid (Belis) * sp_match_profile update; added sid variations - * pls_plot patch; forcing profile order + * pls_plot patch; forcing profile order 1 * [0.2.6] * released 2023-12-01 @@ -68,6 +83,8 @@ * updated date and version * The new code is in R:speciate.0.2.r +# Release Notes Version 0.1 + * [0.1.0] * released 2020-12-20 * Created respeciate diff --git a/R/respeciate.generics.R b/R/respeciate.generics.R index ba83e0f..719b654 100644 --- a/R/respeciate.generics.R +++ b/R/respeciate.generics.R @@ -3,10 +3,14 @@ #' @description \code{respeciate} object classes and generic functions. ######################## -#might move all the text to top +#might move all the @description to top # hard to keep style consistent when docs are in between # multiple functions +#' @description When supplied a \code{data.frame} or similar, +#' \code{\link{as.respeciate}} attempts to coerce it into a +#' \code{respeciate} object. + #' @description When supplied a \code{respeciate} #' object or similar, \code{\link{print}} manages its appearance. #' @description When supplied a \code{respeciate} @@ -18,7 +22,7 @@ #' @param n when plotting or printing a multi-profile object, the #' maximum number of profiles to report. #' @param ... any extra arguments, mostly ignored except by -#' \code{plot} which passes them to \code{\link{sp_plot_profile}}. +#' \code{plot} which passes them to \code{\link{rsp_plot_profile}}. #' @param object like \code{x} but for \code{summary}. #' @note \code{respeciate} objects revert to #' \code{data.frame}s when not doing anything @@ -29,6 +33,75 @@ #' capacity... +################################## +# using +################################## + +# TO DO... + + +################################# +# as.respeciate +################################# + + +# notes +################################## + +# currently only allows for data.frames and object that can be converted +# to data.frames using as.data.frames... + +# might also want to add setAs ???? + +#' @rdname respeciate.generics +#' @export + +as.respeciate <- function(x, ...) +{ + if (is.null(x)) + return(as.respeciate(list())) + UseMethod("as.respeciate") +} + +#' @rdname respeciate.generics +#' @method as.respeciate default +#' @export + +as.respeciate.default <- function(x, ...){ + + #setup + .xargs <- list(...) + + #try to make data.frame + .try <- try(as.data.frame(x), silent=TRUE) + if(class(.try)[1]=="try-error"){ + stop("as.respeciate> x needs to be data.frame or similar...", + sep="", call. = FALSE) + } + + #test structure + if(!"test.rsp" %in% names(.xargs) || .xargs$test.rsp){ + .test <- c("PROFILE_NAME", "PROFILE_CODE", "SPECIES_NAME", "SPECIES_ID", + ".value", "WEIGHT_PERCENT") + .test <- .test[!.test %in% names(.try)] + if(length(.test)>0){x + stop("as.respeciate> bad data structure, expected column(s) missing/unassigned:\n", + paste(.test, sep="", collapse = ", "), "\n", sep="", call.=FALSE) + } + if(any(is.na(.try$SPECIES_ID)) | any(is.na(.try$SPECIES_NAMES))){ + warning("as.respeciate> suspect species data, values missing:\n", + "(respeciate needs valid species entries)\n", + sep="", call.=FALSE) + } + } + + #output + class(.try) <- unique(c("respeciate", class(.try))) + .try +} + + + #notes ################################## @@ -38,29 +111,161 @@ # with different print outputs? # only plot for class plot, etc??? + #' @rdname respeciate.generics #' @method print respeciate #' @export -print.respeciate <- function(x, n = NULL, ...){ - test <- rsp_test_respeciate(x, level = 2, silent = TRUE) - if(test == "respeciate.profile.ref"){ - if(is.null(n)){ - n <- 100 + +print.respeciate <- + function(x, n=6, ...){ + + ################################ + #new general respeciate print method + .tmp <- getOption("width") + .x <- x + + #species info + if(class(.x)[1] == "rsp_si"){ + .y <- unique(.x$SPECIES_ID) + report <- paste("respeciate species list:", + length(.y), "\n", + "[NO PROFILES]", "\n", sep="") + if(length(.y)>0){ + yy <- if(length(.y)>n) {.y[1:n]} else {.y} + for(i in yy){ + .m1 <- paste(" (", "ID ", i, ") ", + subset(.x, SPECIES_ID == i)$SPECIES_NAME[1], + "\n", sep="") + if(nchar(.m1)>.tmp){ + .m1 <- paste(substring(.m1, 1, .tmp-3), "...\n") + } + report <- paste(report, .m1, sep="") + } + } } - return(rsp_print_respeciate_profile(x=x, n=n, ...)) - } - if(test == "respeciate.species.ref"){ - if(is.null(n)){ - n <- 10 + + #profile info + if(class(x)[1] == "rsp_pi"){ + .y <- unique(x$PROFILE_CODE) + report <- paste("respeciate profile list: ", + length(.y), "\n", + "[NO SPECIES]", "\n", sep="") + if(length(.y)>0){ + yy <- if(length(.y)>n) {.y[1:n]} else {.y} + for(i in yy){ + .m1 <- paste(" (", "CODE ", i, ") ", + subset(.x, PROFILE_CODE == i)$PROFILE_NAME[1], + "\n", sep="") + if(nchar(.m1)>.tmp){ + .m1 <- paste(substring(.m1, 1, .tmp-3), "...\n") + } + report <- paste(report, .m1, sep="") + } + } } - return(rsp_print_respeciate_species(x=x, n=n, ...)) - } - if(is.null(n)){ - n <- 10 + + #################################### + #to do + #################################### + #handling for wide frames + # needs testing + + + rsp.rep <- "respeciate" + if(class(.x)[1] == "rsp_sw"){ + rsp.rep <- paste(rsp.rep, " (wide/species)", sep="") + .x <- rsp_melt_wide(.x, pad=FALSE, drop.nas = TRUE) + .x$SPECIES_ID <- .x$SPECIES_NAME + } + if(class(.x)[1] == "rsp_pw"){ + rsp.rep <- paste(rsp.rep, " (wide/profile)", sep="") + .x <- rsp_melt_wide(.x, pad=FALSE, drop.nas = TRUE) + #.x$PROFILE_NAME <- .x$PROFILE_CODE + } + if(class(.x)[1] == "rsp_x"){ + rsp.rep <- gsub("respeciate", "respeciate-like x", rsp.rep) + class(.x) <- class(.x)[class(.x) != "rsp_x"] + } + + #standard respeciate + if(class(.x)[1] == "respeciate"){ + .y <- unique(.x$PROFILE_CODE) + report <- paste(rsp.rep, ": count ", + length(.y), "\n", sep="") + if(length(.y)>0){ + yy <- if(length(.y)>n) {.y[1:n]} else {.y} + for(i in yy){ + if("PROFILE_NAME" %in% names(.x)){ + i2 <- .x$PROFILE_NAME[.x$PROFILE_CODE==i][1] + } else { + i2 <- "[unknown]" + } + if("SPECIES_ID" %in% names(.x)){ + .spe <- length(unique(.x$SPECIES_ID[.x$PROFILE_CODE==i])) + } else { + .spe <- "0!" + } + .m1 <- paste(" ", i, " (", .spe, " species) ", + i2, "\n", sep="") + if(nchar(.m1)>.tmp){ + .m1 <- paste(substring(.m1, 1, .tmp-3), "...\n") + } + report <- paste(report, .m1, sep="") + } + } + } + + #cat output + if(length(.y)<1){ + cat(paste(report, + "empty (or bad?) respeciate object\n", + sep="")) + } else { + if(length(.y)>n){ + #rather no showing last...??? + report <- paste(report, + " > showing ", n, " of ", length(.y), + sep="") + } + cat(report) + } + + #return x (not .x) + return(invisible(x)) } - rsp_print_respeciate(x=x, n=n, ...) -} + + + + + + + +########################## +########################## +## DROP THIS ??? +########################## +########################## + +#rsp_print.respeciate.old <- function(x, n = NULL, ...){ +# test <- .rsp_test_respeciate(x, level = 2, silent = TRUE) +# if(test == "respeciate.profile.ref"){ +# if(is.null(n)){ +# n <- 100 +# } +# return(.rsp_print_respeciate_profile(x=x, n=n, ...)) +# } +# if(test == "respeciate.species.ref"){ +# if(is.null(n)){ +# n <- 10 +# } +# return(.rsp_print_respeciate_species(x=x, n=n, ...)) +# } +# if(is.null(n)){ +# n <- 10 +# } +# .rsp_print_respeciate(x=x, n=n, ...) +#} ## rsp_print functions unexported ## further down @@ -97,7 +302,7 @@ print.rsp_pls <- function(x, n = NULL, ...){ temp <- unlist(lapply(x, function(x) !is.null(x))) temp <- length(temp[temp]) report <- paste(report, "\n list of ", length(x), " profile models", - "\n (", temp, " good)\n", sep="") + "\n (", temp, " fitted)\n", sep="") } cat(report) } @@ -128,87 +333,110 @@ print.rsp_pls <- function(x, n = NULL, ...){ #test is now set up to use data.table -#this is now sp_plot_profile +#this is now rsp_plot_profile plot.respeciate <- function(x, ...){ - sp_plot_profile(x, ...) + rsp_plot_profile(x, ...) } +#' @rdname respeciate.generics +#' @method plot rsp_pls +#' @export + +########################## +#notes +########################## +#all pls_plots are currently being redrafted +# (finish that then rethink this) + +plot.rsp_pls <- function(x, ...){ + pls_plot(x, ...) +} + + + + ######################### #to do ######################### #check below and then remove??? -rsp_plot.respeciate.old <- - function(x, n=NULL, id=NULL, order=TRUE, ...){ +########################## +########################## +## DROP THIS ??? +########################## +########################## + +#rsp_plot.respeciate.old <- +# function(x, n=NULL, id=NULL, order=TRUE, ...){ #add .value if not there ## don't think .value works - x <- rsp_tidy_profile(x) +# x <- .rsp_tidy_profile(x) ##test object type - test <- rsp_test_respeciate(x, level=2, silent=TRUE) - if(test != "respeciate"){ - if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ - stop("No plot method for respeciate.reference files.") - } else { - stop("suspect respeciate object!") - } +# test <- rsp_test_respeciate(x, level=2, silent=TRUE) +# if(test != "respeciate"){ +# if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ +# stop("No plot method for respeciate.reference files.") +# } else { +# stop("suspect respeciate object!") +# } #don't stop - respeciate profile - } +# } ##test something to plot - if(nrow(x)==0){ +# if(nrow(x)==0){ ###################### #think about this ###################### #maybe stop() instead??? #stop("empty respeciate object?") - return(invisible(NULL)) - } +# return(invisible(NULL)) +# } #hold extra args # passing to plot - .xargs <- list(...) +# .xargs <- list(...) #test number of profiles #and subset x, etc... - test <- unique(x$PROFILE_CODE) - if(is.null(n) & is.null(id)){ - id <- 1:length(test) - } else { - if(!is.null(n)){ - id <- 1:n - } - } - test <- test[id] - x <- x[x$PROFILE_CODE %in% test,] +# test <- unique(x$PROFILE_CODE) +# if(is.null(n) & is.null(id)){ +# id <- 1:length(test) +# } else { +# if(!is.null(n)){ +# id <- 1:n +# } +# } +# test <- test[id] +# x <- x[x$PROFILE_CODE %in% test,] #above will die if n-th profile not there - if(length(n)>6){ - warning(paste("\n\t", length(test), - " profiles (might be too many; suggest 6 or less...)", - "\n", sep="")) - } +# if(length(n)>6){ +# warning(paste("\n\t", length(test), +# " profiles (might be too many; suggest 6 or less...)", +# "\n", sep="")) +# } - x <- rsp_test_profile(x) +# x <- rsp_test_profile(x) - if(any(x$.n>1)){ - warning(paste("\n\t", - " found duplicate species in profiles (merged and averaged...)", - "\n", sep="")) - } - x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) +# if(any(x$.n>1)){ +# warning(paste("\n\t", +# " found duplicate species in profiles (merged and averaged...)", +# "\n", sep="")) +# } +# x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) #################################### #issue profile names are not always unique #################################### - test <- x - test$SPECIES_ID <- ".default" - test <- rsp_test_profile(test) +# test <- x +# test$SPECIES_ID <- ".default" +# test <- rsp_test_profile(test) ################### #rep_test #can now replace this with data.table version @@ -216,14 +444,14 @@ rsp_plot.respeciate.old <- ################### #does this need a warning? - if(length(unique(test$PROFILE_NAME)) 6 warning not appearing !!! -#option to have col as a function ??? - -#decide what to do about stacking -#log / bad.log??? - -#say no to stack logs! - -#would like it to handle logs force origin to 0 for standard -# and minimum for logs ??? - -#strip label font size??? - -#key? to reorder the auto.key test and rectangles??? -# key=list(space="right",adj=0,title="Legends", -# points=list(pch=1, -# col=trellis.par.get("superpose.symbol")$col[1:length(labels)]), -# text=list(labels)) - -#plot types??? - -# - -#test -#my <- "C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\uk.metals.aurn.2b.rds" -#my <- sp_build_rsp_x(readRDS(my)) -#rsp_plot(my) - - -######################### -#next -########################## - -#now very messy... -#what can we rationalise??? -#profile name shortening -#profile name to code option??? -#species name to species id option??? - -rsp_plot <- - function(x, id, order=TRUE, - log=FALSE, ...){ - - #setup - ################## - #add .value if not there - x <- rsp_tidy_profile(x) - #others refs - .x.args <- list(...) - .sp.ord <- unique(x$SPECIES_ID) - .sp.pro <- unique(x$PROFILE_NAME) - #n/profile handling - profile <- if (missing(id)) { - profile <- .sp.pro - } else { - id - } - if (is.numeric(profile)) { - if (all(profile == -1)) { - profile <- .sp.pro - } - else { - profile <- .sp.pro[profile] - } - } - if (!any(profile %in% .sp.pro) | any(is.na(profile))) { - stop("RSP> unknown profile(s) or missing ids, please check", call. = FALSE) - } - - if(length(profile)>8 & missing(id)){ - warning("RSP> ", length(profile), " profiles... ", - "plot foreshorten to 8 to reduce cluttering", - "\n\t (maybe use id to force larger range if sure)", - sep="", call.=FALSE) - profile <- profile[1:8] - } - x <- x[x$PROFILE_NAME %in% profile,] - - ##test object type - test <- rsp_test_respeciate(x, level=2, silent=TRUE) - if(test != "respeciate"){ - if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ - stop("RSP> No plot method for respeciate.reference files.", - call. = FALSE) - } else { - stop("RSP> suspect respeciate object!", - call. = FALSE) - } - #don't stop - respeciate profile - } - - ##test something to plot - if(nrow(x)==0){ - ###################### - #think about this - ###################### - #maybe stop() instead??? - #stop("empty respeciate object?") - #maybe warning() aw well?? - return(invisible(NULL)) - } - - x <- rsp_test_profile(x) - - if(any(x$.n>1)){ - warning(paste("RSP> found duplicate species in profiles (merged and averaged...)", - sep=""), call.=FALSE) - } - x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) +# do.call(barplot, .xargs) - #################################### - #issue profile names are not always unique - #################################### - test <- x - test$SPECIES_ID <- ".default" - test <- rsp_test_profile(test) - ################### - #rep_test - #can now replace this with data.table version - #BUT check naming conventions for .n - ################### - - #does this need a warning? - if(length(unique(test$PROFILE_NAME)) found profiles with common names (making unique...)", - sep=""), call. = FALSE) - test$PROFILE_NAME <- make.unique(test$PROFILE_NAME) - x <- x[names(x) != "PROFILE_NAME"] - x <- merge(x, test[c("PROFILE_NAME", "PROFILE_CODE")], by="PROFILE_CODE") - } +# } - #x$PROFILE_NAME <- make.unique(x$PROFILE_NAME) - #order largest to smallest - ############################# - #like to also be able to order by molecular weight - ############################## - if(order){ - ################################ - #bit of a cheat... - ################################ - test <- x - test$PROFILE_CODE <- ".default" - test <- rsp_test_profile(test) - #previous barplot had bedside - if("stack" %in% names(.x.args) && .x.args$stack){ - test <- test[order(test$.total, decreasing = TRUE),] - xx <- unique(test$SPECIES_NAME) - } else { - test <- x[order(x$WEIGHT_PERCENT, decreasing = TRUE),] - xx <- unique(test$SPECIES_NAME) - } - } else { - xx <- unique(x$SPECIES_NAME) - } - x <- x[c("WEIGHT_PERCENT", "PROFILE_NAME", "SPECIES_NAME")] - - x$SPECIES_NAME <- factor(x$SPECIES_NAME, - levels = xx) - - ################## - #profile bar chart - ################## - p1.ls <- list(x= WEIGHT_PERCENT~SPECIES_NAME, - data=x, ylab="Profile Loading", xlab="", - #NB: prepanel seemed to break ylim when stacking - panel = function(x, y, origin, ylim, ...){ - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - panel.grid, ...) - if(missing(origin)){ - origin <- if(min(y, na.rm=TRUE) < 0 ) { - min(y, na.rm=TRUE) - 0.02 - } else { - 0 - } - } - panel.barchart(x=x, y=y, origin=origin, ylim=ylim, ...) - }, - between=list(y=.2), - scales=list(x=list(rot=90, - cex=0.7, - alternating=1), - y=list(rot=c(0,90), - cex=0.7, - alternating=3, - relation="free")) - ) - #, - #auto.key=list(space="right", columns = 1, - # cex=0.7, - # points=FALSE, - # rectangles=TRUE)) - ################# - #this may need refining... - - ##################### - #this is involved... - - if("col" %in% names(.x.args)){ - if(is.function(.x.args$col)){ - .x.args$col <- .x.args$col(length(profile)) - } - } - - if(length(profile)>1){ - #panel or group profiles? - if("panel.profiles" %in% names(.x.args)){ - p1.ls$x <- WEIGHT_PERCENT~SPECIES_NAME | PROFILE_NAME - } else { - p1.ls$groups <- x$PROFILE_NAME - if(!"col" %in% names(p1.ls)){ - p1.ls$col <- rep(trellis.par.get("superpose.polygon")$col, - length.out=length(profile)) - } - } - } - - if(log){ - if("stack" %in% names(.x.args) && .x.args$stack){ - stop("RSP> sorry currently don't stack logs...", - call. = FALSE) - } - #previous - p1.ls$scales$y$log <- 10 - p1.ls$yscale.components <- rsp_yscale.component.log10 - } - p1.ls <- modifyList(p1.ls, .x.args) - if("groups" %in% names(p1.ls) & length(profile)>1){ - #add key... if auto.key not there - .tmp <- if("col" %in% names(p1.ls)){ - rep(p1.ls$col, length.out = length(profile)) - } else { - rep(trellis.par.get("superpose.polygon")$col, - length.out=length(profile)) - } - p1.ls$key <- list(space="right", - #title="Legends", - rectangles=list(col=.tmp), - text = list(profile, cex=0.7)) - } - if("key" %in% names(.x.args)){ - p1.ls$key <- modifyList(p1.ls$key, .x.args$key) - } - if("col" %in% names(p1.ls)){ - p1.ls$par.settings = list(superpose.polygon = list(col = p1.ls$col), - superpose.symbol = list(fill = p1.ls$col)) - } - p1 <- do.call(barchart, p1.ls) - return(p1) - } ################################## #summary @@ -665,11 +634,275 @@ summary.respeciate <- +################################ +# not sure about this +################################ + +############################## +#plot.respeciate using lattice +############################## + +#to do +##################### + +#layout ??? +#n > 6 warning not appearing !!! +#option to have col as a function ??? + +#decide what to do about stacking +#log / bad.log??? + +#say no to stack logs! + +#would like it to handle logs force origin to 0 for standard +# and minimum for logs ??? + +#strip label font size??? + +#key? to reorder the auto.key test and rectangles??? +# key=list(space="right",adj=0,title="Legends", +# points=list(pch=1, +# col=trellis.par.get("superpose.symbol")$col[1:length(labels)]), +# text=list(labels)) + +#plot types??? + +# + +#test +#my <- "C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\uk.metals.aurn.2b.rds" +#my <- sp_build_rsp_x(readRDS(my)) +#rsp_plot(my) + + +######################### +#next +########################## + +#now very messy... +#what can we rationalise??? +#profile name shortening +#profile name to code option??? +#species name to species id option??? + +#.rsp_plot <- +# function(x, id, order=TRUE, +# log=FALSE, ...){ +# +# #setup +# ################## +# #add .value if not there +# x <- .rsp_tidy_profile(x) +# #others refs +# .x.args <- list(...) +# .sp.ord <- unique(x$SPECIES_ID) +# .sp.pro <- unique(x$PROFILE_NAME) +# #n/profile handling +# profile <- if (missing(id)) { +# profile <- .sp.pro +# } else { +# id +# } +# if (is.numeric(profile)) { +# if (all(profile == -1)) { +# profile <- .sp.pro +# } +# else { +# profile <- .sp.pro[profile] +# } +# } +# if (!any(profile %in% .sp.pro) | any(is.na(profile))) { +# stop("RSP> unknown profile(s) or missing ids, please check", call. = FALSE) +# } +# +# if(length(profile)>8 & missing(id)){ +# warning("RSP> ", length(profile), " profiles... ", +# "plot foreshorten to 8 to reduce cluttering", +# "\n\t (maybe use id to force larger range if sure)", +# sep="", call.=FALSE) +# profile <- profile[1:8] +# } +# x <- x[x$PROFILE_NAME %in% profile,] +# +# ##test object type +# test <- rsp_test_respeciate(x, level=2, silent=TRUE) +# if(test != "respeciate"){ +# if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ +# stop("RSP> No plot method for respeciate.reference files.", +# call. = FALSE) +# } else { +# stop("RSP> suspect respeciate object!", +# call. = FALSE) +# } +# #don't stop - respeciate profile +# } +# +# ##test something to plot +# if(nrow(x)==0){ +# ###################### +# #think about this +# ###################### +# #maybe stop() instead??? +# #stop("empty respeciate object?") +# #maybe warning() aw well?? +# return(invisible(NULL)) +# } +# +# x <- rsp_test_profile(x) +# +# if(any(x$.n>1)){ +# warning(paste("RSP> found duplicate species in profiles (merged and averaged...)", +# sep=""), call.=FALSE) +# } +# x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) +# +# #################################### +# #issue profile names are not always unique +# #################################### +# test <- x +# test$SPECIES_ID <- ".default" +# test <- rsp_test_profile(test) +# ################### +# #rep_test +# #can now replace this with data.table version +# #BUT check naming conventions for .n +# ################### +# +# #does this need a warning? +# if(length(unique(test$PROFILE_NAME)) found profiles with common names (making unique...)", +# sep=""), call. = FALSE) +# test$PROFILE_NAME <- make.unique(test$PROFILE_NAME) +# x <- x[names(x) != "PROFILE_NAME"] +# x <- merge(x, test[c("PROFILE_NAME", "PROFILE_CODE")], by="PROFILE_CODE") +# } +# +# +# #x$PROFILE_NAME <- make.unique(x$PROFILE_NAME) +# +# #order largest to smallest +# ############################# +# #like to also be able to order by molecular weight +# ############################## +# if(order){ +# ################################ +# #bit of a cheat... +# ################################ +# test <- x +# test$PROFILE_CODE <- ".default" +# test <- rsp_test_profile(test) +# #previous barplot had bedside +# if("stack" %in% names(.x.args) && .x.args$stack){ +# test <- test[order(test$.total, decreasing = TRUE),] +# xx <- unique(test$SPECIES_NAME) +# } else { +# test <- x[order(x$WEIGHT_PERCENT, decreasing = TRUE),] +# xx <- unique(test$SPECIES_NAME) +# } +# } else { +# xx <- unique(x$SPECIES_NAME) +# } +# x <- x[c("WEIGHT_PERCENT", "PROFILE_NAME", "SPECIES_NAME")] +# +# x$SPECIES_NAME <- factor(x$SPECIES_NAME, +# levels = xx) +# +# ################## +# #profile bar chart +# ################## +# p1.ls <- list(x= WEIGHT_PERCENT~SPECIES_NAME, +# data=x, ylab="Profile Loading", xlab="", +# #NB: prepanel seemed to break ylim when stacking +# panel = function(x, y, origin, ylim, ...){ +# rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), +# panel.grid, ...) +# if(missing(origin)){ +# origin <- if(min(y, na.rm=TRUE) < 0 ) { +# min(y, na.rm=TRUE) - 0.02 +# } else { +# 0 +# } +# } +# panel.barchart(x=x, y=y, origin=origin, ylim=ylim, ...) +# }, +# between=list(y=.2), +# scales=list(x=list(rot=90, +# cex=0.7, +# alternating=1), +# y=list(rot=c(0,90), +# cex=0.7, +# alternating=3, +# relation="free")) +# ) +# #, +# #auto.key=list(space="right", columns = 1, +# # cex=0.7, +# # points=FALSE, +# # rectangles=TRUE)) +# ################# +# #this may need refining... +# +# ##################### +# #this is involved... +# +# if("col" %in% names(.x.args)){ +# if(is.function(.x.args$col)){ +# .x.args$col <- .x.args$col(length(profile)) +# } +# } +# +# if(length(profile)>1){ +# #panel or group profiles? +# if("panel.profiles" %in% names(.x.args)){ +# p1.ls$x <- WEIGHT_PERCENT~SPECIES_NAME | PROFILE_NAME +# } else { +# p1.ls$groups <- x$PROFILE_NAME +# if(!"col" %in% names(p1.ls)){ +# p1.ls$col <- rep(trellis.par.get("superpose.polygon")$col, +# length.out=length(profile)) +# } +# } +# } +# +# if(log){ +# if("stack" %in% names(.x.args) && .x.args$stack){ +# stop("RSP> sorry currently don't stack logs...", +# call. = FALSE) +# } +# #previous +# p1.ls$scales$y$log <- 10 +# p1.ls$yscale.components <- rsp_yscale.component.log10 +# } +# p1.ls <- modifyList(p1.ls, .x.args) +# if("groups" %in% names(p1.ls) & length(profile)>1){ +# #add key... if auto.key not there +# .tmp <- if("col" %in% names(p1.ls)){ +# rep(p1.ls$col, length.out = length(profile)) +# } else { +# rep(trellis.par.get("superpose.polygon")$col, +# length.out=length(profile)) +# } +# p1.ls$key <- list(space="right", +# #title="Legends", +# rectangles=list(col=.tmp), +# text = list(profile, cex=0.7)) +# } +# if("key" %in% names(.x.args)){ +# p1.ls$key <- modifyList(p1.ls$key, .x.args$key) +# } +# if("col" %in% names(p1.ls)){ +# p1.ls$par.settings = list(superpose.polygon = list(col = p1.ls$col), +# superpose.symbol = list(fill = p1.ls$col)) +# } +# p1 <- do.call(barchart, p1.ls) +# return(p1) +# } + ################################# -#unexported code +#old code ################################# @@ -801,54 +1034,12 @@ summary.respeciate <- -################################### -#class builds -################################### - -#rsp_build_respeciate.spcs <- -# function(x, ...){ - #build - #add .value -# x <- rsp_tidy_profile(x) -# class(x) <- c("respeciate.spcs", "data.frame") -# x -# } -#rsp_build_respeciate.ref <- -# function(x, ...){ - #build -# class(x) <- c("respeciate.ref", "data.frame") -# x -# } - -rsp_build_respeciate <- - function(x, ...){ - #build - class(x) <- c("respeciate", "data.frame") - x - } -########################### -#split respeciate by profile -########################### - -#currently not exported -#quick code assumed CODE is unique to profile - -#need to test this - -#not sure we are using this any more -# i think rsp_test, then rsp_test.2 replaced -# and code in plot.respeciate.old ??? - -rsp_split_profile <- function(x){ - ref <- unique(x$PROFILE_CODE) - lapply(ref, function(y) x[x$PROFILE_CODE==y,]) -} ######################## @@ -875,106 +1066,6 @@ rsp_split_profile <- function(x){ # could make other respeciate print outputs # look like this? -rsp_print_respeciate <- - function(x, n=6, ...){ - #profile_code is (I think) only term unique to a profile - y <- unique(x$PROFILE_CODE) - report <- paste("respeciate profile(s): count ", - length(y), "\n", sep="") - if(length(y)==0){ - cat(paste(report, - "empty (or bad?) respeciate object\n", - sep="")) - return(invisible(x)) - } - .tmp <- getOption("width") - yy <- if(length(y)>n) {y[1:n]} else {y} - for(i in yy){ - if("PROFILE_NAME" %in% names(x)){ - i2 <- x$PROFILE_NAME[x$PROFILE_CODE==i][1] - } else { - i2 <- "[unknown]" - } - if("SPECIES_ID" %in% names(x)){ - .spe <- length(unique(x$SPECIES_ID[x$PROFILE_CODE==i])) - } else { - .spe <- "0!" - } - .msg <- paste(" ", i, " (", .spe, " species) ", - i2, "\n", sep="") - if(nchar(.msg)>.tmp){ - .msg <- paste(substring(.msg, 1, .tmp-3), "...\n") - } - report <- paste(report, .msg, sep="") - } - if(length(y)>n){ - report <- paste(report, " ... not showing last ", - length(y)-n, "\n", sep="") - } - cat(report, sep="") - invisible(x) - } - - -## #' @description When supplied a \code{respeciate.ref} -# #' object, \code{\link{print}} manages its appearance. -# #' @param x the \code{respeciate} or \code{respeciate.ref} -# #' object to be printed, plotted, etc. -# #' @rdname respeciate.generics -# #' @method print respeciate.ref -# #' @export -rsp_print_respeciate_profile <- - function(x, n = 100, ...){ - xx <- nrow(x) - wi <- getOption("width") - #################################### - #use of cat might need rethinking? - #################################### - cat("respeciate profile reference\n") - if(n>xx){ - n <- xx - } - if(xx>n){ - report <- c(x$PROFILE_CODE[1:n], "...") - comment <- paste(" profiles [showing first ", n, - "]\n", sep = "") - } else { - report <- x$PROFILE_CODE - comment <- " profiles\n" - } - if(xx>0) cat(report, fill=wi) - cat(" > ", xx, comment, sep="") - invisible(x) - } - - -# #' @rdname respeciate.generics -# #' @method print respeciate.spcs -# #' @export -rsp_print_respeciate_species <- - function(x, n = 10, ...){ - xx <- nrow(x) - wi <- getOption("width") - #################################### - #use of cat might need rethinking? - #################################### - cat("respeciate species reference\n") - if(n>xx){ - n <- xx - } - if(xx>n){ - report <- c(x$SPECIES_NAME[1:n], "...") - comment <- paste(" species [showing first ", n, - "]\n", sep="") - } else { - report <- x$SPECIES_NAME - comment <- " species\n" - } - if(xx>0) cat(report, fill=wi) - cat(" > ", xx, comment, sep="") - invisible(x) - } - @@ -995,122 +1086,129 @@ rsp_print_respeciate_species <- #now replacing previous plot.respeciate -plot.respeciate.old <- - function(x, n=NULL, order=TRUE, ..., - legend.text=NULL, - args.legend = NULL){ - - #test number of profiles - #and subset x, etc... - test <- unique(x$PROFILE_CODE) - if(is.null(n)) n <- 1:length(test) - test <- test[n] - x <- x[x$PROFILE_CODE %in% test,] - #above will die if n-th profile not there - if(length(n)>6){ - warning(paste("\n\t", length(test), - " profiles (might be too many; suggest 6 or less...)", - "\n", sep="")) - } - test.names <- make.unique(sapply(test, - function(y) subset(x, - PROFILE_CODE==y)$PROFILE_NAME[1])) - - #check anything left to work with - if(length(test)==0){ - stop("empty (or bad) respeciate object?") - } - #assuming multiple profiles - #build common data (could use dplyr) - x <- x[c("PROFILE_NAME", "PROFILE_CODE", - "SPECIES_NAME", "SPECIES_ID", "SPEC_MW", - "WEIGHT_PERCENT")] - x <- rsp_split_profile(x) - x <- suppressWarnings(Reduce(function(x, y) - merge(x=x, y=y, - by=c("SPECIES_ID", "SPECIES_NAME", - "SPEC_MW"), - all.x=T, all.y=T), x) - ) - #in case names not unique - names(x) <- make.names(names(x), unique=TRUE) - - #order largest to smallest - if(order){ - temp <- names(x)[grep("WEIGHT_PERCENT", names(x))] - temp <- apply(x[temp], 1, - function(y) sum(y, na.rm=TRUE)) - x <-x[rev(order(temp)),] - } - - #prepare plot - xx <- rsp_tidy_species_name(x$SPECIES_NAME) - x <- x[grep("WEIGHT_PERCENT", names(x))] - x[is.na(x)] <- 0 - ######################### - #above kills log but seems to be needed - #or we loose all records of one species if any are NA - b <- as.matrix(t(x)) - - #below now handled later - #if("beside" %in% names(list(...)) && - # list(...)$beside){ - # #need to replace this with something nicer - # temp <- rep(NA, length(xx) * length(n)) - # temp[(1:length(xx))*length(n)] <- xx - # xx <- temp - #} - - #plot legend handling - #could simplify this - if(is.null(legend.text)){ - legend.text <- test.names - } - if(is.null(args.legend)){ - args.legend <- list() - } - if(!"cex" %in% names(args.legend)){ - args.legend$cex <- 0.5 - } - if(!"x" %in% names(args.legend)){ - args.legend$x <- "topright" - } +########################## +########################## +## DROP THIS ??? +########################## +########################## - #need to do plot differently if horiz(ontal) - if("horiz" %in% names(list(...)) && - list(...)$horiz){ - #set up y annotation - ref <- max(nchar(xx), na.rm=TRUE) * 0.25 - if(ref>10) ref <- 10 #stop it getting silly with x names - op <- par(mar=c(2,ref,4,2)) - #plot standard - b <- barplot(b, yaxt="n", #space=0.5, - legend.text=legend.text, - args.legend =args.legend, - ...) - if(is.matrix(b)){ - b <- apply(b, 2, function(x) mean(x, na.rm=T)) - } - axis(2, at=b, labels=xx, las=2, tick=FALSE, cex.axis=0.5) - rm(op) - } else { - #set up x annotation - ref <- max(nchar(xx), na.rm=TRUE) * 0.25 - if(ref>10) ref <- 10 #stop it getting silly with x names - op <- par(mar=c(ref,4,4,2)) - #plot standard - b <- barplot(b, xaxt="n", #space=0.5, - legend.text=legend.text, - args.legend = args.legend, - ...) - if(is.matrix(b)){ - b <- apply(b, 2, function(x) mean(x, na.rm=T)) - } - axis(1, at=b, labels=xx, las=2, tick=FALSE, cex.axis=0.5) - rm(op) - } - } +#plot.respeciate.old <- +# function(x, n=NULL, order=TRUE, ..., +# legend.text=NULL, +# args.legend = NULL){ +# +# #test number of profiles +# #and subset x, etc... +# test <- unique(x$PROFILE_CODE) +# if(is.null(n)) n <- 1:length(test) +# test <- test[n] +# x <- x[x$PROFILE_CODE %in% test,] +# #above will die if n-th profile not there +# if(length(n)>6){ +# warning(paste("\n\t", length(test), +# " profiles (might be too many; suggest 6 or less...)", +# "\n", sep="")) +# } +# test.names <- make.unique(sapply(test, +# function(y) subset(x, +# PROFILE_CODE==y)$PROFILE_NAME[1])) +# +# #check anything left to work with +# if(length(test)==0){ +# stop("empty (or bad) respeciate object?") +# } +# +# #assuming multiple profiles +# #build common data (could use dplyr) +# x <- x[c("PROFILE_NAME", "PROFILE_CODE", +# "SPECIES_NAME", "SPECIES_ID", "SPEC_MW", +# "WEIGHT_PERCENT")] +# x <- rsp_split_profile(x) +# x <- suppressWarnings(Reduce(function(x, y) +# merge(x=x, y=y, +# by=c("SPECIES_ID", "SPECIES_NAME", +# "SPEC_MW"), +# all.x=T, all.y=T), x) +# ) +# #in case names not unique +# names(x) <- make.names(names(x), unique=TRUE) +# +# #order largest to smallest +# if(order){ +# temp <- names(x)[grep("WEIGHT_PERCENT", names(x))] +# temp <- apply(x[temp], 1, +# function(y) sum(y, na.rm=TRUE)) +# x <-x[rev(order(temp)),] +# } +# +# #prepare plot +# xx <- rsp_tidy_species_name(x$SPECIES_NAME) +# x <- x[grep("WEIGHT_PERCENT", names(x))] +# x[is.na(x)] <- 0 +# ######################### +# #above kills log but seems to be needed +# #or we loose all records of one species if any are NA +# b <- as.matrix(t(x)) +# +# #below now handled later +# #if("beside" %in% names(list(...)) && +# # list(...)$beside){ +# # #need to replace this with something nicer +# # temp <- rep(NA, length(xx) * length(n)) +# # temp[(1:length(xx))*length(n)] <- xx +# # xx <- temp +# #} +# +# #plot legend handling +# #could simplify this +# if(is.null(legend.text)){ +# legend.text <- test.names +# } +# if(is.null(args.legend)){ +# args.legend <- list() +# } +# if(!"cex" %in% names(args.legend)){ +# args.legend$cex <- 0.5 +# } +# if(!"x" %in% names(args.legend)){ +# args.legend$x <- "topright" +# } +# +# #need to do plot differently if horiz(ontal) +# if("horiz" %in% names(list(...)) && +# list(...)$horiz){ +# #set up y annotation +# ref <- max(nchar(xx), na.rm=TRUE) * 0.25 +# if(ref>10) ref <- 10 #stop it getting silly with x names +# op <- par(mar=c(2,ref,4,2)) +# #plot standard +# b <- barplot(b, yaxt="n", #space=0.5, +# legend.text=legend.text, +# args.legend =args.legend, +# ...) +# if(is.matrix(b)){ +# b <- apply(b, 2, function(x) mean(x, na.rm=T)) +# } +# axis(2, at=b, labels=xx, las=2, tick=FALSE, cex.axis=0.5) +# rm(op) +# } else { +# #set up x annotation +# ref <- max(nchar(xx), na.rm=TRUE) * 0.25 +# if(ref>10) ref <- 10 #stop it getting silly with x names +# op <- par(mar=c(ref,4,4,2)) +# #plot standard +# b <- barplot(b, xaxt="n", #space=0.5, +# legend.text=legend.text, +# args.legend = args.legend, +# ...) +# if(is.matrix(b)){ +# b <- apply(b, 2, function(x) mean(x, na.rm=T)) +# } +# axis(1, at=b, labels=xx, las=2, tick=FALSE, cex.axis=0.5) +# rm(op) +# } +# } diff --git a/R/rsp.R b/R/rsp.R new file mode 100644 index 0000000..2b910ef --- /dev/null +++ b/R/rsp.R @@ -0,0 +1,178 @@ +#' @name rsp +#' @title rsp_profile +#' @aliases rsp rsp_profile + + +#' @description Getting profile(s) from the R (re)SPECIATE archive + +#' @param ... The function assumes all inputs (except \code{include.refs}) +#' are \code{SPECIES_CODE}s (the unique descriptor the EPA assigns to all +#' profiles in SPECIATE) or sources of profile information and requests these +#' form the local (re)SPECIATE archive. Typically, simple +#' objects like character and numeric vectors, as assumed to profile codes and +#' composite data-types like \code{respeciate} objects or \code{data.frame}, +#' are assumed to contain a named \code{PROFILE_CODE} column. All potential +#' profile codes are requested and unrecognized codes are ignored. +#' @param include.refs logical, if profile reference information should be +#' included when extracting the requested profile(s) from the archive, default +#' \code{FALSE}. +#' @return \code{rsp_profile} or the short-hand \code{rsp} return an object of +#' \code{respeciate} class, a \code{data.frame} containing one or more profile +#' from the local (re)SPECIATE archive. +#' @note The option \code{include.refs} adds profile source reference +#' information to the returned \code{respeciate} data set. The default option +#' is to not include these because some profiles have several associated +#' references and including these replicates records, once per reference. +#' \code{respeciate} code is written to handle this but if you are developing +#' own methods or code and include references in any profile build you may be +#' biasing some analyses in favor of those multiple-reference profile unless +#' you check and account such cases. +#' @references +#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., +#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. +#' The development and uses of EPA SPECIATE database. +#' Atmospheric Pollution Research, 1(4), pp.196-206. +#' @examples \dontrun{ +#' x <- rsp_profile(8833, 8850) +#' plot(x)} + +#NOTES +####################### + +# 0.3. notes +# went from sp_profile to rsp_profile (and rsp) +# dropped code argument +# using as.respeciate in generics to build rsp object + + +#to think about +####################### + +#add functions to build or add respeciate-like data of own, +# e.g. x matrices for pls modelling + +# (build functions started as separate script, rsp.build.R) + +## rsp_import_profile to import a profile from an external source +## extension of above to import data from specific sources +## might be very code intensive..? + +## local function to pad data using database??? + +## (now importing via xxx.r) +## #' @import data.table + +# may need to set data.table specifically?? +# data.table::as.data.table, etc?? + +##################### +#to think about +##################### +# not sure but I think something in the main build: +# (default; include.refs = FALSE) +# PROFILES>>SPECIES>>SPECIES_PROPERTIES +# (full build; include.refs = TRUE) +# PROFILES>>SPECIES>>SPECIES_PROPERTIES>>PROFILE_REFERENCE>>REFERENCES +# is replicating profiles. +# + +#v 0.2 +# based on previous sp_profile but using data.table +# (0.1 version currently unexported sp_profile.old) + +#' @rdname rsp +#' @export + +rsp_profile <- function(..., include.refs=FALSE) { + + # code currently handles: + # respeciate.ref, data.frames containing profile_code, + # numerics and characters + + ####################### + #could replace code AND ... with just ...??? + # but would need to think about options + # if any in ... were data.frames + ###################### + .try <- lapply(list(...), function(.code){ + if(is.data.frame(.code) && "PROFILE_CODE" %in% names(.code)){ + .code <- unique(.code$PROFILE_CODE) + } + if(is.numeric(.code)) { + .code <- as.character(.code) + } + if(!is.character(.code)) { + warning("RSP> unexpected 'PROFILE_CODE' source found and ignored", + call.=FALSE) + .code <- NULL + } + .code + }) + code <- do.call(c, .try) + + ################ + #previous.... + ################ + #if(is.data.frame(code) && "PROFILE_CODE" %in% names(code)){ + # code <- unique(code$PROFILE_CODE) + #} + #if(is.numeric(code)) code <- as.character(code) + #if(!is.character(code)) { + # stop("unexpected 'code' class", + # call.=FALSE) + #} + + PROFILES <- data.table::as.data.table(sysdata$PROFILES) + SPECIES <- data.table::as.data.table(sysdata$SPECIES) + SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) + PROFILE_REFERENCE <- data.table::as.data.table(sysdata$PROFILE_REFERENCE) + REFERENCES <- data.table::as.data.table(sysdata$REFERENCES) + + ########################## + #testing tolower below + # as a fix for code arg case sensitivity + ########################## + # could test replacing some of this with sp_pad??? + # IF sp_pad stays + dt <- PROFILES[tolower(PROFILES$PROFILE_CODE) %in% tolower(code),] + dt <- merge(dt, SPECIES, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE, + allow.cartesian=TRUE) + dt <- merge(dt, SPECIES_PROPERTIES, by = "SPECIES_ID", all.y=FALSE, + all.x=TRUE, allow.cartesian=TRUE) + if(include.refs){ + dt <- merge(dt, PROFILE_REFERENCE, by = "PROFILE_CODE", all.y=FALSE, + all.x=TRUE, allow.cartesian=TRUE) + dt <- merge(dt, REFERENCES, by = "REF_Code", all.y=FALSE, all.x=TRUE, + allow.cartesian=TRUE) + } + dt <- dt[order(dt$PROFILE_CODE, decreasing = FALSE),] + + #add .value if weight_percent to copy... + x <- as.data.frame(dt) + if("WEIGHT_PERCENT" %in% names(x) & !".value" %in% names(x)) { + x$.value <- x$WEIGHT_PERCENT + } + + # note + ###################################### + + #dropping generic unexported rsp_build_respeciate(x) + # replacing with as.respeciate + # could do similar elsewhere if not used widely elsewhere ??? + + #output + rsp <- as.respeciate(x, test.rsp=FALSE) + return(rsp) +} + +#' @rdname rsp +#' @export + +rsp <- function(...) { rsp_profile(...) } + + + + + + + diff --git a/R/sp.average.R b/R/rsp.average.R similarity index 78% rename from R/sp.average.R rename to R/rsp.average.R index 8e68d55..a14f159 100644 --- a/R/sp.average.R +++ b/R/rsp.average.R @@ -1,14 +1,14 @@ -#' @name sp.average -#' @title speciate data averaging functions -#' @aliases sp_average_profile +#' @name rsp.average +#' @title (re)SPECIATE data averaging functions +#' @aliases rsp_average_profile #' @description Functions to build composite (re)SPECIATE profiles -#' @description \code{sp_average_profile} generates an average composite +#' @description \code{rsp_average_profile} generates an average composite #' of a supplied multi-profile \code{respeciate} object. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param code required character, the unique profile code to assign to the #' average profile. @@ -16,9 +16,9 @@ #' profile. If not supplied, this defaults to a collapsed list of the codes #' of all the profiles averaged. #' @param method numeric, the averaging method to apply: Currently only 1 (default) -#' \code{mean(x)}. +#' \code{mean(rsp)}. #' @param ... additional arguments, currently ignored -#' @return \code{sp_average_profile} returns a single profile average +#' @return \code{rsp_average_profile} returns a single profile average #' version of the supplied \code{respeciate} profile. #' @note In development function; arguments and outputs likely to be subject to #' change. @@ -29,16 +29,13 @@ #NOTE -#' @rdname sp.average -#' @export -## #' @import data.table (in xxx.r) -# may need to set data.table specifically?? -# data.table::as.data.table, etc?? +##################### +#rsp_average_profile +##################### -###################### #average data -###################### +# multiple profiles to one mean averaged profile... ## in development @@ -66,18 +63,23 @@ #test ########################### -#aa <- sp_profile(sp_find_profile("ae8", by="profile_type")) -#sp_average_profile(aa) +#aa <- rsp_profile(sp_find_profile("ae8", by="profile_type")) +#rsp_average_profile(aa) +#' @rdname rsp.average +#' @export +## #' @import data.table (in xxx.r) +# may need to set data.table specifically?? +# data.table::as.data.table, etc?? -sp_average_profile <- function(x, code = NULL, name = NULL, method = 1, +rsp_average_profile <- function(rsp, code = NULL, name = NULL, method = 1, ...){ ################################# #check x is a respeciate object?? #check it has .value - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) #save class to return as is.. # thinking about this @@ -144,12 +146,20 @@ sp_average_profile <- function(x, code = NULL, name = NULL, method = 1, +################################# +############################### +## unexported +############################### +################################## + ##################################### -#sp_species_calc +#rsp_species_calc ##################################### -sp_species_calc <- function(x, calc = NULL, +# unfinished + +rsp_species_calc <- function(x, calc = NULL, id = NULL, name = NULL, ...){ #x is an rsp object @@ -158,7 +168,7 @@ sp_species_calc <- function(x, calc = NULL, .temp <- x #test we can use this..? print(calc) - .temp <- sp_dcast_species(.temp) + .temp <- rsp_dcast_species(.temp) if(length(grep("=", calc)) > 0){ print("is equals") } else { diff --git a/R/rsp.build.R b/R/rsp.build.R new file mode 100644 index 0000000..b668d53 --- /dev/null +++ b/R/rsp.build.R @@ -0,0 +1,249 @@ +#' @name rsp.build +#' @title Building respeciate-like Objects +#' @aliases rsp_build_x + + +#' @description rsp function(s) to reconfigure data.frames (and similar +#' object classes) for use with data and functions in re(SPECIATE). + +#' @param x \code{data.frame} or similar (i.e. +#' something that can be coerced into a \code{data.frame} using +#' \code{as.data.frame}) to be converted into a \code{respeciate} object. +#' @param profile_name,profile_code (\code{character}) The name of the column +#' in \code{x} containing profile name and code records, respectively. If not +#' already named according to SPECIATE conventions, at least one of these will +#' need to be assigned. +#' @param species_name,species_id (\code{character}) The name of the column +#' in \code{x} containing species name and id records, respectively. If not +#' already named according to SPECIATE conventions, at least one of these will +#' need to be assigned. +#' @param value (\code{character}) The name of the column in \code{x} +#' containing measurement values. If not already named according to SPECIATE +#' conventions, this will need to be assigned. +#' @param ... (any other arguments) currently ignored. +#' @return \code{rsp_build}s attempt to build and return a (re)SPECIATE-like +#' object that can be compared with data from re(SPECIATE). +#' @note If you want to compare your data with profiles in the SPECIATE archive, +#' you need to use EPA SPECIATE conventions when assigning species names and +#' identifiers. Currently, we are working on options to improve on this (and +#' very happy to discuss if anyone has ideas), but current best suggestion is: +#' (1) identify the SPECIATE species code for each of the species in your data set, +#' and (2) assign these as \code{species_id} when \code{rsp_build}ing. The +#' function will then associate the \code{species_name} from SPECIATE species +#' records. + +#NOTES +####################### + +#to think about +####################### + +## sp_build_profile to make a profile locally +## needs profile_name, profile_code +## species_name, species_id +## weight_percent (and possibly .value) + +############################## +# rsp_build_x +############################## + +# notes +############################## + +# 0.3. notes +# went from sp_build_rsp_x to rsp_build_x +# using as.respeciate and adding rsp_x subclass + +# rsp_build_x currently converts x to data.frame (as.data.frame(x)) +# if tibble is loaded, this currently complicates things... + +# BUT might want to revisit this because it looked like: +# the data structure was fine but +# print.respeciate was having problems... + +# BUT might be other problems I did not spot + +# BUT be nice if c("respeciate", class("tibble")) could be used... +# to retain the data type history +# and drop back to tibble rather than data.frame.... + + +#' @rdname rsp.build +#' @export + +rsp_build_x <- + function(x, profile_code, profile_name, + species_name, species_id, + value, ...){ + + # light build for a rsp_x data object + # might need spec_mwt + + ########################### + # current build rules + ########################### + + # must be a data.frame or something that can be converted + # using as.data.frame(x) + + # profile and species columns must be character... + + # profile_name: if not there, if sent in call use, + # else if there use profile_code + # profile_code: if not there, if sent in call use, + # else if there use profile_name + # species_name: if not there, if sent in call use, + # else if there use use species_id to look-up + # if any missing, warn + # species_id: if not there, if sent in call use, + # else if there use species_name to look-up + # if any missing, warn + # .value: if not there, if sent in call use. + # (NEW/TESTING) else if there use WEIGHT_PERCENT + # WEIGHT_PERCENT:if not there, if sent in call use + # else if there use .value to look-up + + # should error if any of these missing at end of build + + # redundant? + # currently only using to turn warning off... + .x.args <- list(...) + + #adding the as.data.frame because + # code is not planning nicely with Dennis' tibbles + # if tibble is loaded before respeciate... + x <- as.data.frame(x) + + #rationalise this?... + # could they be else options when + # check for species and profile columns? + ################################ + # notes + # profile and species columns all need to character + # user could supply any thing and previously + # only applying as.character when making something new... + # else may at start then when making something new... + # (at end did not work for species if building one of species_name + # and species_id from other...) + # also + # do values need to be as.numeric??? + if("PROFILE_NAME" %in% names(x)){ + x$PROFILE_NAME <- as.character(x$PROFILE_NAME) + } + if("PROFILE_CODE" %in% names(x)){ + x$PROFILE_CODE <- as.character(x$PROFILE_CODE) + } + if("SPECIES_NAME" %in% names(x)){ + x$SPECIES_NAME <- as.character(x$SPECIES_NAME) + } + if("SPECIES_ID" %in% names(x)){ + x$SPECIES_ID <- as.character(x$SPECIES_ID) + } + + #if not there and sent in call + + #note: + #current making all BUT values, character class + if(!"PROFILE_NAME" %in% names(x) & (!missing(profile_name))){ + if(!profile_name %in% names(x)){ + stop("rsp_build> '", as.character(profile_name)[1], + "' not in 'x'...", sep="", call. = FALSE) + } + x$PROFILE_NAME <- as.character(x[, profile_name]) + } + if(!"PROFILE_CODE" %in% names(x) & (!missing(profile_code))){ + if(!profile_code %in% names(x)){ + stop("rsp_build> '", as.character(profile_code)[1], + "' not in 'x'...", sep="", call. = FALSE) + } + x$PROFILE_CODE <- as.character(x[, profile_code]) + } + if(!"SPECIES_NAME" %in% names(x) & (!missing(species_name))){ + if(!species_name %in% names(x)){ + stop("rsp_build> '", as.character(species_name)[1], + "' not in 'x'...", sep="", call. = FALSE) + } + x$SPECIES_NAME <- as.character(x[, species_name]) + } + if(!"SPECIES_ID" %in% names(x) & (!missing(species_id))){ + if(!species_id %in% names(x)){ + stop("rsp_build> '", as.character(species_id)[1], + "' not in 'x'...", sep="", call. = FALSE) + } + x$SPECIES_ID <- as.character(x[, species_id]) + } + if(!".value" %in% names(x)){ + if(missing(value)){ + if("WEIGHT_PERCENT" %in% names(x)){ + x$.value <- x[, "WEIGHT_PERCENT"] + } else { + stop("rsp_build> 'value' not found for 'x'...", + sep="", call. = FALSE) + } + } else { + if(!value %in% names(x)){ + stop("rsp_build> '", as.character(value)[1], + "' not in 'x'...", sep="", call. = FALSE) + } + } + x$.value <- x[, value] + } + ################# + #old + ################# + #if(!".value" %in% names(x) & (!missing(value))){ + # if(!value %in% names(x)){ + # stop("sp_build> '", as.character(value)[1], + # "' not in 'x'...", sep="", call. = FALSE) + # } + # x$.value <- x[, value] + #} + + #if still not there try to assign using what is there + + if("PROFILE_NAME" %in% names(x) & !"PROFILE_CODE" %in% names(x)){ + x$PROFILE_CODE <- x$PROFILE_NAME + } + if("PROFILE_CODE" %in% names(x) & !"PROFILE_NAME" %in% names(x)){ + x$PROFILE_NAME <- x$PROFILE_CODE + } + test <- c("SPECIES_NAME", "SPECIES_ID")[c("SPECIES_NAME", "SPECIES_ID") + %in% names(x)] + if(length(test)==1){ + #one there, other as look-up + .tmp <- data.table::as.data.table( + sysdata$SPECIES_PROPERTIES[c("SPECIES_NAME", "SPECIES_ID")] + ) + .tmp$SPECIES_NAME <- as.character(.tmp$SPECIES_NAME) + .tmp$SPECIES_ID <- as.character(.tmp$SPECIES_ID) + x <- merge(data.table::as.data.table(x), + data.table::as.data.table(.tmp), + all.x=TRUE, all.y=FALSE, allow.cartesian=TRUE) + x <- as.data.frame(x) + } + if(".value" %in% names(x) & !"WEIGHT_PERCENT" %in% names(x)){ + x$WEIGHT_PERCENT <- x$.value + } + + #pass via as.speciate to build rsp_x + # note: this replaces previous local testing + test.rsp <- if("test.rsp" %in% names(.x.args)){ + .x.args$test.rsp + } else { + TRUE + } + x <- as.respeciate(x, test.rsp=test.rsp) + #slip in rsp_x tag + class(x) <- unique(c("rsp_x", class(x))) + x + } + + + + + + + + + + diff --git a/R/sp.cluster.R b/R/rsp.cluster.R similarity index 91% rename from R/sp.cluster.R rename to R/rsp.cluster.R index 722ebbe..ff9f3b0 100644 --- a/R/sp.cluster.R +++ b/R/rsp.cluster.R @@ -1,20 +1,20 @@ -#' @name sp.cluster -#' @title sp_profile clustering -#' @aliases sp_profile_distance +#' @name rsp.cluster +#' @title (re)SPECIATE profile cluster analysis methods +#' @aliases rsp_distance_profile -#' @description sp_profile functions for studying similarities (or -#' dissimilarities) within multi-profile (re)SPECIATE data sets +#' @description (re)SPECIATE functions for studying similarities (or +#' dissimilarities) within (re)SPECIATE data sets -#' @description \code{\link{sp_profile_distance}} calculates the statistical distance +#' @description \code{\link{rsp_distance_profile}} calculates the statistical distance #' between re(SPECIATE) profiles, and clusters profiles according to nearness. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param output Character vector, required function output: \code{'report'} the #' calculated distance matrix; \code{'plot'} a heat map of that distance #' matrix. #' @note Please note: function in development; structure and arguments may be #' subject to change. -#' @return Depending on the \code{output} option, \code{sp_profile_distance} returns +#' @return Depending on the \code{output} option, \code{sp_distance_profile} returns #' one or more of the following: the correlation matrix, a heat map of the #' correlation matrix. @@ -32,9 +32,6 @@ #NOTE -#' @rdname sp.cluster -#' @export - # using data.table for dcast # start build the code for the matching method @@ -45,33 +42,34 @@ # needs thinking through # needs options/formals -#output like in sp_species_cor +#output like in rsp_cor_species #also check through and consider other options in sp_profile_cor #currently tracking #think about how we handle too-big matrices, e.g. -# aa <- sp_profile(sp_find_profile("ae6", by="profile_type")) -# sp_profile_distance(aa) - +# aa <- rsp_profile(rsp_find_profile("ae6", by="profile_type")) +# rsp_distance_profile(aa) #test ###################### -#aa <- sp_profile(sp_find_profile("ae8", by="profile_type")) -#sp_profile_distance(aa) +#aa <- rsp_profile(rsp_find_profile("ae8", by="profile_type")) +#rsp_distance_profile(aa) +#' @rdname rsp.cluster +#' @export -sp_profile_distance <- function(x, output = c("plot", "report")){ +rsp_distance_profile <- function(rsp, output = c("plot", "report")){ #add .value if missing - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) # make by profile (rows) by species (columns) data.frame # move profile_code to row.names for heatmap - .x <- sp_dcast(x, widen = "species") + .x <- rsp_dcast(x, widen = "species") .tmp <- .x[-1:-2] row.names(.tmp) <- .x[,1] diff --git a/R/sp.cor.R b/R/rsp.cor.R similarity index 90% rename from R/sp.cor.R rename to R/rsp.cor.R index 8c14e9c..01041d0 100644 --- a/R/sp.cor.R +++ b/R/rsp.cor.R @@ -1,13 +1,13 @@ -#' @name sp.cor +#' @name rsp.cor #' @title (re)SPECIATE Species Correlations -#' @aliases sp_species_cor +#' @aliases rsp_cor_species -#' @description sp_species functions for studying relationships between -#' species in multi-profile (re)SPECIATE data sets. +#' @description (re)SPECIATE functions for studying relationships between +#' species in (re)SPECIATE data sets. -#' @description \code{\link{sp_species_cor}} generates a by-species correlation +#' @description \code{\link{rsp_cor_species}} generates a by-species correlation #' matrix of the supplied (re)SPECIATE data sets. -#' @param x \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param min.n \code{numeric} (default 3), the minimum number of species measurements #' needed in a profile for the function to use it in correlation calculations. @@ -36,7 +36,7 @@ #' output. Options include: \code{'silent'} (default), to return the #' correlation matrix invisibly; \code{TRUE} to return the matrix #' (visibly); and, \code{FALSE} to not return it. -#' @return By default \code{sp_species_cor} invisibly returns the calculated +#' @return By default \code{rsp_cor_species} invisibly returns the calculated #' correlation matrix a plots it as a heat map, but arguments including #' \code{heatmap} and \code{report} can be used to modify function outputs. @@ -48,7 +48,7 @@ # PLUS when done maybe for make local function so same # can be used in other similar functions -#' @rdname sp.cor +#' @rdname rsp.cor #' @export # using data.table for dcast @@ -65,6 +65,10 @@ #to think about ######################### +#changed name from rsp_species_cor to rsp_cor_species +# rsp_species_cor +# then you could also have rsp_cor_profile + #speed up the correlation calculation # currently painful using for loop # can't use stats::cor on whole data set and include min.n option @@ -108,15 +112,20 @@ # The list options are not yet done... -#aa <- sp_profile(sp_find_profile("ae8", by="profile_type")) -#sp_species_cor(aa) +#aa <- rsp_profile(rsp_find_profile("ae8", by="profile_type")) +#rsp_cor_species(aa) + +#rsp_cor_species(rsp_q_pm.ae8()) -sp_species_cor <- function(x, min.n = 3, +rsp_cor_species <- function(rsp, min.n = 3, cols = c("#80FFFF", "#FFFFFF", "#FF80FF"), na.col = "#CFCFCF", heatmap.args = TRUE, key.args = TRUE, report = "silent"){ + + x <- rsp #quick fix for now + #if ref missing - .x <- sp_dcast(x, widen="species") + .x <- rsp_dcast(x, widen="species") #no point doing any have less than min.n values? .test <- apply(.x, 2, function(x) length(.x[!is.na(x)])) @@ -210,7 +219,7 @@ sp_species_cor <- function(x, min.n = 3, if(is.list(key.args)){ .k[names(key.args)] <- key.args } - do.call(rsp_col_key, .k) + do.call(.rsp_col_key, .k) } } diff --git a/R/sp.info.R b/R/rsp.info.R similarity index 70% rename from R/sp.info.R rename to R/rsp.info.R index 58cef31..671e9b2 100644 --- a/R/sp.info.R +++ b/R/rsp.info.R @@ -1,15 +1,13 @@ -#' @name sp.info +#' @name rsp.info #' @title re(SPECIATE) information -#' @aliases sp_info sp_profile_info sp_species_info sp_find_profile -#' sp_find_species +#' @aliases rsp_info rsp_profile_info rsp_species_info rsp_find_profile +#' rsp_find_species ########################### #keep think about the names ########################### -# sp_profile_info used to be sp_find_profile -# sp_species_info used to be sp_find_species -# both sp_find_ functions are currently sp_info_ wrappers -# should remove at some point??? +# rsp_profile_info/ rsp_find_profile +# rsp_species_info/ rsp_find_species ######################### #to think about @@ -20,36 +18,36 @@ #' @description Functions that provide (re)SPECIATE #' source information. -#' \code{sp_info} generates a brief version report for the currently installed +#' \code{rsp_info} generates a brief version report for the currently installed #' (re)SPECIATE data sets. -#' \code{sp_profile_info} searches the currently installed (re)SPECIATE +#' \code{rsp_profile_info} searches the currently installed (re)SPECIATE #' data sets for profile records. -#' \code{sp_species_info} searches the currently installed (re)SPECIATE +#' \code{rsp_species_info} searches the currently installed (re)SPECIATE #' data sets for species records. #' @param ... character(s), any search term(s) to use when searching #' the local (re)SPECIATE archive for relevant records using -#' \code{sp_profile_info} or \code{sp_species_info}. +#' \code{rsp_profile_info} or \code{rsp_species_info}. #' @param by character, the section of the archive to -#' search, by default \code{'keywords'} for \code{sp_profile_info} and +#' search, by default \code{'keywords'} for \code{rsp_profile_info} and #' \code{'species_names'} for \code{sp_species_info}. #' @param partial logical, if \code{TRUE} (default) -#' \code{sp_profile_info} or \code{sp_profile_info} use partial matching. +#' \code{rsp_profile_info} or \code{rsp_profile_info} use partial matching. -#' @return \code{sp_info} provides a brief version information report on the +#' @return \code{rsp_info} provides a brief version information report on the #' currently installed (re)SPECIATE archive. -#' @return \code{sp_profile_info} returns a \code{data.frame} of +#' @return \code{rsp_profile_info} returns a \code{data.frame} of #' profile information, as a \code{respeciate} object. -#' \code{sp_species_info} returns a \code{data.frame} of +#' \code{rsp_species_info} returns a \code{data.frame} of #' species information as a \code{respeciate} object. #' @examples \dontrun{ #' profile <- "Ethanol" -#' pr <- sp_find_profile(profile) +#' pr <- rsp_find_profile(profile) #' pr #' #' species <- "Ethanol" -#' sp <- sp_find_species(species) +#' sp <- rsp_find_species(species) #' sp} #' @@ -58,7 +56,7 @@ ####################### -#sp_info +#rsp_info ####################### # tidy output??? @@ -70,10 +68,10 @@ # this is not currently catchable!!!! # a <- sp_info() #a = NULL -#' @rdname sp.info +#' @rdname rsp.info #' @export -sp_info <- function() { +rsp_info <- function() { #extract profile info from archive .ver <- "source: SPECIATE 5.2\n\t[in (re)SPECIATE since 0.2.0]" .ver <- paste(.ver, "\n\t[now (re)SPECIATE ", packageVersion("respeciate"), "]", sep="") @@ -83,10 +81,10 @@ sp_info <- function() { } -#' @rdname sp.info +#' @rdname rsp.info #' @export -sp_profile_info <- function(..., by = "keywords", partial = TRUE) { +rsp_profile_info <- function(..., by = "keywords", partial = TRUE) { #extract profile info from archive out <- sysdata$PROFILES terms <- c(...) @@ -112,7 +110,7 @@ sp_profile_info <- function(..., by = "keywords", partial = TRUE) { species <- sysdata$SPECIES ref <- out$PROFILE_CODE for(ti in terms){ - ans <- sp_find_species(ti, by=by, partial=partial) + ans <- rsp_species_info(ti, by=by, partial=partial) terms <- species$PROFILE_CODE[species$SPECIES_ID %in% ans$SPECIES_ID] ref <- ref[ref %in% terms] } @@ -129,23 +127,24 @@ sp_profile_info <- function(..., by = "keywords", partial = TRUE) { } } } - out <- rsp_build_respeciate(out) + out <- .rsp_build_respeciate(out) + class(out) <- unique(c("rsp_pi", class(out))) return(out) } -#' @rdname sp.info +#' @rdname rsp.info #' @export #wrapper for above -sp_find_profile <- function(...){ - sp_profile_info(...) +rsp_find_profile <- function(...){ + rsp_profile_info(...) } -#' @rdname sp.info +#' @rdname rsp.info #' @export -sp_species_info <- function(..., by = "species_name", partial = TRUE) { +rsp_species_info <- function(..., by = "species_name", partial = TRUE) { #extract species info from archive out <- sysdata$SPECIES_PROPERTIES terms <- c(...) @@ -160,15 +159,16 @@ sp_species_info <- function(..., by = "species_name", partial = TRUE) { } } #out <- PROFILES[grep(term, PROFILES[[by]], ignore.case = TRUE), ] - out <- rsp_build_respeciate(out) + out <- .rsp_build_respeciate(out) + class(out) <- unique(c("rsp_si", class(out))) return(out) } -#' @rdname sp.info +#' @rdname rsp.info #' @export #wrapper for above -sp_find_species <- function(...){ - sp_species_info(...) +rsp_find_species <- function(...){ + rsp_species_info(...) } diff --git a/R/sp.match.R b/R/rsp.match.R similarity index 81% rename from R/sp.match.R rename to R/rsp.match.R index d463994..c999666 100644 --- a/R/sp.match.R +++ b/R/rsp.match.R @@ -1,34 +1,34 @@ -#' @name sp.match +#' @name rsp.match #' @title Find nearest matches from reference set of profiles -#' @aliases sp_match_profile - -#' @description \code{sp_match_profile} compares a supplied species -#' (re)SPECIATE profile and a reference set of supplied profiles and -#' attempt to identify nearest matches on the basis of correlation -#' coefficient. -#' @param x A \code{respeciate} object or similar \code{data.frame} containing -#' a species profile to be compared with profiles in \code{ref}. If \code{x} +#' @aliases rsp_match_profile + +#' @description \code{rsp_match_profile} compares a supplied species +#' (re)SPECIATE profile (or similar data set) and a reference set of +#' supplied profiles and attempt to identify nearest matches on the +#' basis of similarity. +#' @param rsp A \code{respeciate} object or similar \code{data.frame} containing +#' a species profile to be compared with profiles in \code{ref}. If \code{rsp} #' contains more than one profile, these are averaged (using -#' \code{\link{sp_average_profile}}), and the average compared. +#' \code{\link{rsp_average_profile}}), and the average compared. #' @param ref A \code{respeciate} object, a \code{data.frame} containing a #' multiple species profiles, to be used as reference library when identifying -#' nearest matches for \code{x}. +#' nearest matches for \code{rsp}. #' @param matches Numeric (default 10), the maximum number of profile matches to #' report. #' @param rescale Numeric (default 5), the data scaling method to apply before -#' comparing \code{x} and profiles in \code{ref}: options 0 to 5 handled by -#' \code{\link{sp_rescale}}. +#' comparing \code{rsp} and profiles in \code{ref}: options 0 to 5 handled by +#' \code{\link{rsp_rescale}}. #' @param min.n \code{numeric} (default 8), the minimum number of paired #' species measurements in two profiles required for a match to be assessed. -#' See also \code{\link{sp_species_cor}}. +#' See also \code{\link{rsp_cor_species}}. #' @param method Character (default 'pd'), the similarity measure to use, current -#' options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), +#' options 'pd', the Pearson's Distance (1 - Pearson's correlation coefficient), #' or 'sid', the Standardized Identity Distance (See References). -#' @param test.x Logical (default FALSE). The match process self-tests by adding -#' \code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting -#' \code{test.x} to \code{TRUE} retains this as an extra record. -#' @return \code{sp_match_profile} returns a fit report: a \code{data.frame} of -#' up to \code{n} fit reports for the nearest matches to \code{x} from the +#' @param test.rsp Logical (default FALSE). The match process self-tests by adding +#' \code{rsp} to \code{ref}, which should generate a perfect fit=0 score. Setting +#' \code{test.rsp} to \code{TRUE} retains this as an extra record. +#' @return \code{rsp_match_profile} returns a fit report: a \code{data.frame} of +#' up to \code{n} fit reports for the nearest matches to \code{rsp} from the #' reference profile data set, \code{ref}. #' @references Distance metrics are based on recommendations by Belis et al (2015) #' and as implemented in Mooibroek et al (2022): @@ -46,7 +46,7 @@ #NOTE -#' @rdname sp.match +#' @rdname rsp.match #' @export ###################### @@ -54,16 +54,25 @@ #find ref profile 'most similar to x ###################### -# the sp_dcast uses data.table -# sp_match uses rbindlist from data.table +############################ +############################ +##need to go through notes and code and tidy +## this is first goes at x->rsp +## will need tidy and rethink? +########################### +########################### + + +# the rsp_dcast uses data.table +# rsp_match uses rbindlist from data.table #in development #to do ######################### -##aa <- sp_profile(sp_find_profile("composite", by="profile_name")) -##sp_match_profile(sp_profile("41220C"), aa) +##aa <- rsp_profile(rsp_find_profile("composite", by="profile_name")) +##rsp_match_profile(rsp_profile("41220C"), aa) ##assuming 41220C exists ##NOTE sp_profile code is case sensitive @@ -79,11 +88,11 @@ # when (I guess) nothing there to compare... #default for ref -# using sp_profile(sp_find_profile("composite", by="profile_name")) +# using rsp_profile(rsp_find_profile("composite", by="profile_name")) # in example. #could add error if x is more than one profile -# could use sp_profile_mean when written if we want to force to one +# could use rsp_profile_mean when written if we want to force to one # one profile [?? nb: that function name not decided yet] #do we want an output option? @@ -91,8 +100,8 @@ #option to exclude test? from report -#how can users make x if not from (re)SPECIATE archive -# currently using rsp_ [code after this function] +#how can users make rsp if not from (re)SPECIATE archive +# currently using .rsp_ [code after this function] # to anonymise a speciate profile # suggestion: # identify needed columns, formats and names @@ -114,18 +123,18 @@ # could also do this earlier if min.bin set in formals # but might need to rethink n, min.bin, etc??? -sp_match_profile <- function(x, ref, matches=10, rescale=5, - min.n=8, method = "pd", test.x=FALSE){ +rsp_match_profile <- function(rsp, ref, matches=10, rescale=5, + min.n=8, method = "pd", test.rsp=FALSE){ ####################### #if ref missing ################## #to do - # using sp_profile(sp_find_profile("composite", by="profile_name")) + # using rsp_profile(rsp_find_profile("composite", by="profile_name")) # looked promising #add .value if not there - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) #tidy x for testing #.x.pr.cd <- as.character(x$PROFILE_CODE) @@ -136,9 +145,9 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # might think about changing this in future if(length(unique(x$PROFILE_CODE))>1){ - x <- sp_average_profile(x, code = "test") + x <- rsp_average_profile(x, code = "test") } else { - x <- sp_average_profile(x, code = "test", + x <- rsp_average_profile(x, code = "test", name = paste("test>", x$PROFILE_NAME[1], sep="")) } @@ -152,7 +161,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, ############### #do test anyway ############### - #if(test.x){ + #if(test.rsp){ matches <- matches + 1 #} @@ -184,7 +193,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # cols are there??? #.tmp <- data.table::as.data.table(sp_rescale_species(.tmp, method=rescale)) - .tmp <- data.table::as.data.table(sp_rescale_profile(.tmp, method=rescale)) + .tmp <- data.table::as.data.table(rsp_rescale_profile(.tmp, method=rescale)) ################### #keep species names and ids for renaming @@ -210,7 +219,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, # na.rm=TRUE, # value.var = ".value") - .tmp <- data.table::as.data.table(sp_dcast(.tmp, widen="profile")) + .tmp <- data.table::as.data.table(rsp_dcast(.tmp, widen="profile")) #nb: need the as.data.table() because sp_profile_dcast # currently returns data.frame @@ -242,7 +251,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, #min n ######################### #to do - # compare this and code in sp_species_cor + # compare this and code in rsp_cor_species # if/when we deal with this stop message this code may need to be updated ######################### @@ -359,7 +368,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, } if(!is.function(f)){ - stop("RSP> sp_match_profile 'method' unknown", call. = FALSE) + stop("RSP> rsp_match_profile 'method' unknown", call. = FALSE) } .out <- .tmp[, (.cols) := lapply(.SD, f), .SDcols = .cols] @@ -387,7 +396,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, if(length(.out)<1){ #see notes.... #sometimes this is because there are less than min.n species in the x profile - stop("sp_match_profile> No (", min.n, " point) matches for x", call. = FALSE) + stop("rsp_match_profile> No (", min.n, " point) matches for rsp", call. = FALSE) } .tmp <- names(.out) @@ -405,7 +414,7 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, row.names = 1:length(.out)) #conflicted!!! - if(!test.x){ + if(!test.rsp){ matches <- matches - 1 if("test" %in% x$PROFILE_CODE){ .out <- .out[tolower(.out$PROFILE_CODE)!="test",] @@ -430,21 +439,6 @@ sp_match_profile <- function(x, ref, matches=10, rescale=5, -#need something to replace this that helps users build local profiles - -#basic build needs -# profile_name and profile_code -# species_name and species_id -# weight_percent (and possibly .value) - -rsp_ <- function(x){ - .o <- sp_profile(x) - .o$PROFILE_NAME <- paste("test", .o$PROFILE_NAME, sep=">") - .o$PROFILE_CODE <- "test" - .o -} - - diff --git a/R/sp.pad.R b/R/rsp.pad.R similarity index 55% rename from R/sp.pad.R rename to R/rsp.pad.R index 97aeb84..85d7d8d 100644 --- a/R/sp.pad.R +++ b/R/rsp.pad.R @@ -1,32 +1,32 @@ -#' @name sp.pad +#' @name rsp.pad #' @title (re)SPECIATE profile padding functions -#' @aliases sp_pad +#' @aliases rsp_pad #' @description Functions for padding \code{respeciate} objects. -#' @description \code{sp_pad} pads a supplied (re)SPECIATE profile data set +#' @description \code{rsp_pad} pads a supplied (re)SPECIATE profile data set #' with profile and species meta-data. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param pad character, type of meta data padding, current options -#' \code{'profile'}, \code{'species'}, \code{'weight'}, \code{reference}, +#' \code{'profile'}, \code{'species'}, \code{'weight'}, \code{'reference'}, #' \code{'standard'} (default; all but \code{'reference'}), and \code{'all'} -#' ('all'). +#' (all). #' @param drop.nas logical, discard any rows where \code{WEIGHT_PERCENT} is #' \code{NA}, default \code{TRUE}. -#' @return \code{sp_pad} returns \code{x}, with requested additional profile -#' and species meta-data added as additional \code{data.frame} columns. -#' See Note. +#' @return \code{rsp_pad} returns supplied \code{respeciate} data set, with +#' requested additional profile and species meta-data added as additional +#' \code{data.frame} columns. See Note. #' @note Some data handling can remove (re)SPECIATE meta-data, -#' and \code{sp_pad}s provide a quick rebuild/repair. For example, -#' \code{\link{sp_dcast}}ing to a (by-species or by-profile) widened +#' and \code{rsp_pad}s provide a quick rebuild/repair. For example, +#' \code{\link{rsp_dcast}}ing to a (by-species or by-profile) widened #' form strips some meta-data, and padding is used as part of the -#' \code{\link{sp_melt_wide}} and padding is used to re-add this meta-data +#' \code{\link{rsp_melt_wide}} to re-add this meta-data #' when returning the data set to its standard long form. #NOTE -#' @rdname sp.pad +#' @rdname rsp.pad #' @export ## #' @import data.table # now done in xxx.r @@ -80,14 +80,14 @@ #x <- sp_pad(c) -sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ - +rsp_pad <- function(rsp, pad = "standard", drop.nas = TRUE){ #should pad allow TRUE/FALSE??? #should argument within sp_pad be method?? - #tidy x - x <- rsp_tidy_profile(x) + #tidy rsp + x <- .rsp_tidy_profile(rsp) + #save class .cls <- class(x) out <- data.table::as.data.table(x) @@ -97,7 +97,7 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ PROFILES <- data.table::as.data.table(sysdata$PROFILES) .tmp <- intersect(names(out), names(PROFILES)) if(length(.tmp)>0){ - out <- merge(out, PROFILES, by = .tmp, all.y=FALSE, + out <- data.table::merge.data.table(out, PROFILES, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) } } @@ -105,25 +105,28 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ #species if(any(c("species", "standard", "all") %in% tolower(pad))){ SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) + SPECIES_PROPERTIES$SPECIES_ID <- as.character(SPECIES_PROPERTIES$SPECIES_ID) .tmp <- intersect(names(out), names(SPECIES_PROPERTIES)) - print(.tmp) if(length(.tmp) >0){ - out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, + out <- data.table::merge.data.table(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) } } #species weights + # might not want to add weights if rsp_x??? if(any(c("weight", "weights", "standard", "all") %in% tolower(pad))){ SPECIES <- data.table::as.data.table(sysdata$SPECIES) + SPECIES$SPECIES_ID <- as.character(SPECIES$SPECIES_ID) .tmp <- intersect(names(out), names(SPECIES)) if(length(.tmp) >0){ - out <- merge(out, SPECIES, by = .tmp, all.y=FALSE, + out <- data.table::merge.data.table(out, SPECIES, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) } } - - #return(out) + if(all(is.na(out$WEIGHT_PERCENT)) && ".value" %in% names(out)){ + out$WEIGHT_PERCENT <- out$.value + } #references if(any(c("reference", "references", "all") %in% tolower(pad))){ @@ -131,12 +134,12 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ REFERENCES <- data.table::as.data.table(sysdata$REFERENCES) .tmp <- intersect(names(out), names(PROFILE_REFERENCE)) if(length(.tmp) >0){ - out <- merge(out, PROFILE_REFERENCE, by = .tmp, all.y=FALSE, + out <- data.table::merge.data.table(out, PROFILE_REFERENCE, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) } .tmp <- intersect(names(out), names(REFERENCES)) if(length(.tmp) >0){ - out <- merge(out, REFERENCES, by = .tmp, all.y=FALSE, + out <- data.table::merge.data.table(out, REFERENCES, by = .tmp, all.y=FALSE, all.x=TRUE, allow.cartesian=TRUE) } } @@ -148,6 +151,7 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ # in the SPECIATE archive ################################################# #drop.nas. + if(drop.nas){ out <- out[!is.na(out$WEIGHT_PERCENT),] } @@ -161,7 +165,9 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ # could return as input class # see notes out <- as.data.frame(out) - rsp_build_respeciate(out) + #.rsp_build_respeciate(out) + class(out) <- .cls + out } @@ -175,62 +181,68 @@ sp_pad <- function(x, pad = "standard", drop.nas = TRUE){ #holding until testing on new code finished -sp_pad.old <- function(x, pad = "species", drop.nas = TRUE){ - - #tidy x - x <- rsp_tidy_profile(x) - #save class - .cls <- class(x) - out <- data.table::as.data.table(x) - - #set up padding for melts... - .long <- "nothing" - if(pad=="species"){ - .long <- "SPECIES_NAME" - } - if(pad=="profile"){ - .long <- "PROFILE_CODE" - } - - PROFILES <- data.table::as.data.table(sysdata$PROFILES) - SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) - if(.long=="PROFILE_CODE"){ - #add in profile then species info - out <- merge(out, PROFILES, by = .long, all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - .tmp <- intersect(names(out), names(SPECIES_PROPERTIES)) - out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - } - if(.long=="SPECIES_NAME"){ - #add in species then profiles info - out <- merge(out, SPECIES_PROPERTIES, by = .long, all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - .tmp <- intersect(names(out), names(PROFILES)) - out <- merge(out, PROFILES, by = .tmp, all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - } - #to get weight_percentage etc - if(pad %in% c("species", "profile", "weight")){ - SPECIES <- data.table::as.data.table(sysdata$SPECIES) - .tmp <- intersect(names(out), names(SPECIES)) - out <- merge(out, SPECIES, by = .tmp, all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - } else { - #not great but... - #if not padding WEIGHT_PERCENT has to be .value - out$WEIGHT_PERCENT <- out$.value - } - #drop.nas. - if(drop.nas){ - out <- out[!is.na(out$WEIGHT_PERCENT),] - } - - #not sure how to handle output... - #see notes - out <- as.data.frame(out) - rsp_build_respeciate(out) -} +#sp_pad.old <- function(x, pad = "species", drop.nas = TRUE){ +# +# #tidy x +# x <- rsp_tidy_profile(x) +# #save class +# .cls <- class(x) +# out <- data.table::as.data.table(x) +# +# #set up padding for melts... +# .long <- "nothing" +# if(pad=="species"){ +# .long <- "SPECIES_NAME" +# } +# if(pad=="profile"){ +# .long <- "PROFILE_CODE" +# } +# +# PROFILES <- data.table::as.data.table(sysdata$PROFILES) +# SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) +# if(.long=="PROFILE_CODE"){ +# #add in profile then species info +# out <- merge(out, PROFILES, by = .long, all.y=FALSE, +# all.x=TRUE, allow.cartesian=TRUE) +# .tmp <- intersect(names(out), names(SPECIES_PROPERTIES)) +# out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, +# all.x=TRUE, allow.cartesian=TRUE) +# } +# if(.long=="SPECIES_NAME"){ +# #add in species then profiles info +# out <- merge(out, SPECIES_PROPERTIES, by = .long, all.y=FALSE, +# all.x=TRUE, allow.cartesian=TRUE) +# .tmp <- intersect(names(out), names(PROFILES)) +# out <- merge(out, PROFILES, by = .tmp, all.y=FALSE, +# all.x=TRUE, allow.cartesian=TRUE) +# } +# #to get weight_percentage etc +# if(pad %in% c("species", "profile", "weight")){ +# SPECIES <- data.table::as.data.table(sysdata$SPECIES) +# .tmp <- intersect(names(out), names(SPECIES)) +# out <- merge(out, SPECIES, by = .tmp, all.y=FALSE, +# all.x=TRUE, allow.cartesian=TRUE) +# } else { +# #not great but... +# #if not padding WEIGHT_PERCENT has to be .value +# out$WEIGHT_PERCENT <- out$.value +# } +# #drop.nas. +# if(drop.nas){ +# out <- out[!is.na(out$WEIGHT_PERCENT),] +# } +# ############################# +# ##thinking about +# ## removing all columns of just NAs... +# # out[,which(unlist(lapply(out, function(x)!all(is.na(x))))), with=FALSE] +# ## works if data.table object... +# ############################ +# +# #not sure how to handle output... +# #see notes +# out <- as.data.frame(out) +# rsp_build_respeciate(out) +#} diff --git a/R/sp.plot.R b/R/rsp.plot.R similarity index 81% rename from R/sp.plot.R rename to R/rsp.plot.R index d555fcf..f245c1c 100644 --- a/R/sp.plot.R +++ b/R/rsp.plot.R @@ -1,18 +1,18 @@ -#' @name sp.plot +#' @name rsp.plot #' @title plotting (re)SPECIATE profiles -#' @aliases sp_plot_profile sp_plot_species +#' @aliases rsp_plot_profile rsp_plot_species #' @description General plots for \code{respeciate} objects. -#' @description \code{sp_plot} functions generate plots for supplied +#' @description \code{rsp_plot} functions generate plots for supplied #' (re)SPECIATE data sets. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param id numeric, the indices of profiles or species to use when -#' plotting with \code{sp_plot_profile} or \code{sp_plot_species}, -#' respectively. For example, \code{sp_plot_profile(x, id=1:6)} plots -#' first 6 profiles in \code{respeciate} object \code{x}. -#' @param multi.profile character, how \code{sp_plot_profile} should +#' plotting with \code{rsp_plot_profile} or \code{rsp_plot_species}, +#' respectively. For example, \code{rsp_plot_profile(rsp, id=1:6)} plots +#' first 6 profiles in \code{respeciate} object \code{rsp}. +#' @param multi.profile character, how \code{rsp_plot_profile} should #' handle multiple profiles, e.g. 'group' or 'panel' (default #' group). #' @param order logical, order the species in the @@ -43,14 +43,14 @@ #functions -# sp_plot_profile -# sp_plot_species +# rsp_plot_profile +# rsp_plot_species -# plot.respeciate is wrapper for sp_plot_profile +# plot.respeciate is wrapper for rsp_plot_profile #uses unexported code -# rsp_plot_fix -# rsp_yscale.component.log10 (currently in sp.pls.r) +# .rsp_plot_fix +# .rsp_yscale.component.log10 (currently in rsp.pls.r) @@ -70,11 +70,11 @@ #examples # maybe -# sp_plot_profile(spq_pm.ae8()) +# rsp_plot_profile(spq_pm.ae8()) # (allows most lattice style plot control, etc key=list(...)) # (but includes some short cuts to common handling, e.g. log=T to # log y scales and reformat y axes) -# sp_plot_profile(spq_pm.ae8(), key=list(space="top", columns=2), log=T) +# rsp_plot_profile(rsp_q_pm.ae8(), key=list(space="top", columns=2), log=T) #color defaults... #issue current default wraps if you exceed number of cols in default set. @@ -83,23 +83,20 @@ ## ?? could extrapolate the default colors using something like above ??? - # dennis asked for data as part of return # that is do-able but may need an object class # (maybe like the openair code...) -#thinking about an sp_plot_compare(x, y) +#thinking about an rsp_plot_compare(x, y) # to compare profile x and profile(s) y # started project (in own-notes) ################################### -#sp_plot_profile +#rsp_plot_profile ################################### -#' @rdname sp.plot -#' @export -# now done in xxx.r +# now any unexported code (.rsp...) should be in xxx.r ########################## #notes @@ -107,12 +104,12 @@ #moved this to lattice for paneling option ############################ -#using rsp_plot_fix for warning/handling for +#using .rsp_plot_fix for warning/handling for # duplicate species in profiles (handling merge/mean) # duplicated profile names (handling make unique) ############################# -#using rsp_test_profile +#using .rsp_test_profile # when ordering... #see in code notes about jobs @@ -126,11 +123,15 @@ # what is x, how is it formatted, etc # then same for y, groups and cond... -sp_plot_profile <- function(x, id, multi.profile = "group", +#' @rdname rsp.plot +#' @export + +rsp_plot_profile <- function(rsp, id, multi.profile = "group", order=TRUE, log=FALSE, ..., silent=FALSE){ #setup + x <- rsp ## this needs sorting... .x.args <- list(...) #currently not even trying to stack logs... @@ -139,13 +140,18 @@ sp_plot_profile <- function(x, id, multi.profile = "group", ###################### #to do #document issues - stop("RSP> Sorry, currently not stacking logs.", + stop("RSP> Sorry, currently not stacking log plots", call. = FALSE) } } #others refs - #.sp.ord <- unique(x$SPECIES_ID) - .sp.pro <- unique(x$PROFILE_CODE) + #was profile_code; changed to profile_name + # might be an issue; some names not unique... + .sp.pro <- if(is.factor(x$PROFILE_NAME)) { + levels(x$PROFILE_NAME) + } else { + unique(x$PROFILE_NAME) + } #n/profile handling profile <- if (missing(id)) { .sp.pro @@ -173,11 +179,12 @@ sp_plot_profile <- function(x, id, multi.profile = "group", } profile <- profile[1:6] } - x <- x[x$PROFILE_CODE %in% profile,] + x <- x[x$PROFILE_NAME %in% profile,] + #check for duplicates, etc... #tidy naming etc... - x <- rsp_plot_fix(x, silent=silent, ...) + x <- .rsp_plot_fix(x, silent=silent, ...) ##test something to plot if(nrow(x)==0){ @@ -194,8 +201,9 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #switching profile from profile_code to profile_name... # for plot labeling #################################### - profile <- unique(x$PROFILE_NAME) + #profile <- unique(x$PROFILE_NAME) #should think about other naming options??? + #(now using profile_name from start) #order largest to smallest ############################# @@ -207,7 +215,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", ################################ test <- x test$PROFILE_CODE <- ".default" - test <- rsp_test_profile(test) + test <- .rsp_test_profile(test) #previous barplot had bedside if("stack" %in% names(.x.args) && .x.args$stack){ test <- test[order(test$.total, decreasing = TRUE),] @@ -223,7 +231,12 @@ sp_plot_profile <- function(x, id, multi.profile = "group", x$SPECIES_NAME <- factor(x$SPECIES_NAME, levels = xx) + if(!is.factor(x$PROFILE_NAME)){ + x$PROFILE_NAME <- factor(x$PROFILE_NAME, levels=unique(x$PROFILE_NAME)) + } + +#print(as.data.frame(x)) ################## #profile bar chart ################## @@ -231,7 +244,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", data=x, ylab="Profile Loading", xlab="", #NB: prepanel seemed to break ylim when stacking panel = function(x, y, origin, ...){ - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), panel.grid, ...) if(missing(origin)){ ###################################### @@ -272,7 +285,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #shortcut to scales(y(log)) and yscale.component if(log){ p1.ls$scales$y$log <- 10 - p1.ls$yscale.components <- rsp_yscale.component.log10 + p1.ls$yscale.components <- .rsp_yscale.component.log10 } #3. extra user settings @@ -284,7 +297,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", if("col" %in% names(p1.ls)){ if(is.function(p1.ls$col)){ p1.ls$col <- if("groups" %in% names(p1.ls)){ - p1.ls$col(length(profile)) + p1.ls$col(length(levels(x$PROFILE_NAME))) } else { p1.ls$col(1) } @@ -292,7 +305,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", } else { p1.ls$col <- if("groups" %in% names(p1.ls)){ rep(trellis.par.get("superpose.polygon")$col, - length.out=length(profile)) + length.out=length(levels(x$PROFILE_NAME))) } else { trellis.par.get("superpose.polygon")$col[1] } @@ -315,10 +328,10 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #like to track border as well as col... #want to add box behind legend when key in plot... if("groups" %in% names(p1.ls)){ - .tmp <- list(space="right", + .tmp <- list(space="top", #title="Legends", rectangles=list(col=rep(p1.ls$col, - length.out=length(profile))), + length.out=length(levels(x$PROFILE_NAME)))), text = list(profile, cex=0.7)) p1.ls$key <- if("key" %in% names(p1.ls)){ modifyList(.tmp, p1.ls$key) @@ -335,7 +348,7 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #sp_plot_species ################################### -#' @rdname sp.plot +#' @rdname rsp.plot #' @export #in development @@ -343,15 +356,16 @@ sp_plot_profile <- function(x, id, multi.profile = "group", #lot taken straight from sp_plot_profile #so lots of redundancy -sp_plot_species <- function(x, id, multi.species = "group", +rsp_plot_species <- function(rsp, id, multi.species = "group", order = FALSE, log = FALSE, ..., silent = FALSE){ #setup + x <- rsp ## this needs sorting... .x.args <- list(...) ###################################### - #not sure we are using stack for this + #not sure we are using stack for this... ###################################### #currently not even trying to stack logs... if("stack" %in% names(.x.args) && .x.args$stack){ @@ -359,7 +373,7 @@ sp_plot_species <- function(x, id, multi.species = "group", ###################### #to do #document issues - stop("RSP> Sorry, currently not stacking logs.", + stop("RSP> Sorry, currently not stacking logs", call. = FALSE) } } @@ -370,9 +384,11 @@ sp_plot_species <- function(x, id, multi.species = "group", #if already factor ??? # user could be forcing order ############################## - .sp.ord <- as.character(unique(x$SPECIES_ID)) - #.sp.pro <- unique(x$PROFILE_CODE) - #n/profile handling + .sp.ord <- if(is.factor(x$SPECIES_NAME)){ + levels(x$SPECIES_NAME) + } else { + as.character(unique(x$SPECIES_NAME)) + } species <- if (missing(id)) { .sp.ord } else { @@ -400,11 +416,11 @@ sp_plot_species <- function(x, id, multi.species = "group", } species <- species[1:20] } - x <- x[x$SPECIES_ID %in% species,] + x <- x[x$SPECIES_NAME %in% species,] #check for duplicates, etc... #tidy naming etc... - x <- rsp_plot_fix(x, silent=silent, ...) + x <- .rsp_plot_fix(x, silent=silent, ...) ##test something to plot if(nrow(x)==0){ @@ -418,15 +434,16 @@ sp_plot_species <- function(x, id, multi.species = "group", } #################################### - #current species ordering by id arg... + #current species ordering by name arg... #(see below about reordering) #################################### - species <- species[species %in% unique(x$SPECIES_ID)] - x$SPECIES_ID <- factor(x$SPECIES_ID, levels=species) - x <- x[order(x$SPECIES_ID),] - x$SPECIES_NAME <- factor(x$SPECIES_NAME, unique(x$SPECIES_NAME)) - species<- levels(x$SPECIES_NAME) - sp.ord <- as.numeric(factor(species, levels=sort(species))) + + species <- species[species %in% unique(x$SPECIES_NAME)] + x$SPECIES_NAME <- factor(x$SPECIES_NAME, levels=species) + x <- x[order(x$SPECIES_NAME),] + #sp.ord <- as.numeric(factor(species, levels=sort(species))) + #sp.ord <- 1:length(levels(x$SPECIES_NAME)) + ################################## #should think about other naming options??? @@ -449,7 +466,7 @@ sp_plot_species <- function(x, id, multi.species = "group", ################################ test <- x test$PROFILE_CODE <- ".default" - test <- rsp_test_profile(test) + test <- .rsp_test_profile(test) #previous barplot had bedside if("stack" %in% names(.x.args) && .x.args$stack){ test <- test[order(test$.total, decreasing = TRUE),] @@ -463,6 +480,9 @@ sp_plot_species <- function(x, id, multi.species = "group", } x <- x[c(".value","PROFILE_CODE", "PROFILE_NAME", "SPECIES_NAME")] + + #print(xx) + ################## #species trend line plot ################## @@ -470,7 +490,14 @@ sp_plot_species <- function(x, id, multi.species = "group", #dcast and melt to add in any missed entries as NAs #(to force trend line gaps) #not padding, obviously not dropping nas... - x <- sp_melt_wide(sp_dcast_species(x), pad=FALSE, drop.nas = FALSE) + x <- rsp_melt_wide(rsp_dcast_species(x), pad=FALSE, drop.nas = FALSE) + + if(!is.factor(x$PROFILE_NAME)){ + x$PROFILE_NAME <- factor(x$PROFILE_NAME, levels=unique(x$PROFILE_NAME)) + } + if(!is.factor(x$SPECIES_NAME)){ + x$SPECIES_NAME <- factor(x$SPECIES_NAME, levels=unique(x$SPECIES_NAME)) + } ############################### #species handling @@ -512,7 +539,6 @@ sp_plot_species <- function(x, id, multi.species = "group", ############################## #species alignment - p1.ls <- list(x= .value~.x, data=x, ylab="Measurement", xlab=.xlab, type="l", @@ -520,7 +546,7 @@ sp_plot_species <- function(x, id, multi.species = "group", panel = function(x, y, ...){ at.x <- pretty(x) at.y <- pretty(y) - rsp_panelPal("grid", list(h=at.y,v=at.x, col="grey", lty=3), + .rsp_panelPal("grid", list(h=at.y,v=at.x, col="grey", lty=3), panel.abline, ...) panel.xyplot(x=x, y=y, ...) }, @@ -550,7 +576,7 @@ sp_plot_species <- function(x, id, multi.species = "group", #shortcut to scales(y(log)) and yscale.component if(log){ p1.ls$scales$y$log <- 10 - p1.ls$yscale.components <- rsp_yscale.component.log10 + p1.ls$yscale.components <- .rsp_yscale.component.log10 } #3. extra user settings @@ -569,7 +595,7 @@ sp_plot_species <- function(x, id, multi.species = "group", } } else { p1.ls$col <- if("groups" %in% names(p1.ls)){ - colorRampPalette(rainbow(12, s = 0.5, v = 1), + colorRampPalette(rainbow(12, s = 0.5, v = 1)[1:11], interpolate = "spline")(length(species)) #or: #colorRampPalette(rainbow(12, s = 0.5, v = 1),interpolate = "spline")(x) @@ -593,11 +619,15 @@ sp_plot_species <- function(x, id, multi.species = "group", #could be issue here if user uses auto.key??? #like to track border as well as col... if("groups" %in% names(p1.ls)){ + #print(x$SPECIES_NAME) .tmp <- list(space="right", #title="Legends", lines=list(col=rep(p1.ls$col, length.out=length(species))), - text = list(levels(x$SPECIES_NAME), cex=0.7)) + ########################## + #text = list(levels(x$SPECIES_NAME), cex=0.7)) + text = list(species, cex=0.7)) + #changed from above because x$SPECIES_NAME p1.ls$key <- if("key" %in% names(p1.ls)){ modifyList(.tmp, p1.ls$key) } else { diff --git a/R/rsp.pls.R b/R/rsp.pls.R new file mode 100644 index 0000000..a79f2a4 --- /dev/null +++ b/R/rsp.pls.R @@ -0,0 +1,1060 @@ +#' @name rsp.pls +#' @title (re)SPECIATE profile Positive Least Squares models +#' @aliases rsp_pls_profile pls_report pls_test pls_fit_species +#' pls_refit_species pls_rebuild + +#' @description Functions for Positive Least Squares (PSL) fitting of +#' (re)SPECIATE profiles + +#' @description +#' \code{rsp_pls_profile} builds PSL models for supplied profile(s) using +#' the \code{\link{nls}} function, the 'port' algorithm and a lower +#' limit of zero for all model outputs to enforce the positive fits. The +#' modeled profiles are typically from an external source, e.g. a +#' measurement campaign, and are fit as a linear additive series of reference +#' profiles, here typically from (re)SPECIATE, to provide a measure of +#' source apportionment based on the assumption that the profiles in the +#' reference set are representative of the mix that make up the modeled +#' sample. The \code{pls_} functions work with \code{rsp_pls_profile} +#' outputs, and are intended to be used when refining and analyzing +#' these PLS models. See also \code{pls_plot}s for PLS model plots. + +#' @param rsp A \code{respeciate} object, a \code{data.frame} of +#' profiles in standard long form, intended for PLS modelling. +#' @param ref A \code{respeciate} object, a \code{data.frame} of +#' profiles also in standard long form, used as the set of candidate +#' source profiles when fitting \code{rsp}. +#' @param power A numeric, an additional factor to be added to +#' weightings when fitting the PLS model. This is applied in the form +#' \code{weight^power}, and increasing this, increases the relative +#' weighting of the more heavily weighted measurements. Values in the +#' range \code{1 - 2.5} are sometimes helpful. +#' @param ... additional arguments, typically ignored or passed on to +#' \code{\link{nls}}. +#' @param pls A \code{rsp_pls_profile} output, intended for use with +#' \code{pls_} functions. +#' @param species for \code{pls_fit_species}, a data.frame of +#' measurements of an additional species to be fitted to an existing +#' PLS model, or for \code{pls_refit_species} a character vector of the +#' names of species already included in the model to be refit. Both are +#' multiple-\code{species} wrappers for \code{pls_rebuild}, a general-purpose +#' PLS fitter than only handles single \code{species}. +#' @param refit.profile (for \code{pls_fit_species}, \code{pls_refit_species} +#' and \code{pls_rebuild}) logical. When fitting a new \code{species} (or +#' refitted an existing \code{species}), all other species in the reference +#' profiles are held 'as is' and added \code{species} is fit to the source +#' contribution time-series of the previous PLS model. By default, the full PLS +#' model is then refit using the revised \code{ref} source profile to generate +#' a PLS model based on the revised source profiles (i.e., ref + new species +#' or ref + refit species). However, this second step can be omitted using +#' \code{refit.profile=FALSE} if you want to use the supplied \code{species} +#' as an indicator rather than a standard member of the apportionment model. +#' @param as.marker for \code{pls_rebuild}, \code{pls_fit_species} and +#' \code{pls_refit_species}, \code{logical}, default \code{FALSE}, when +#' fitting (or refitting) a species, treat it as source marker. +#' @param drop.missing for \code{pls_rebuild}, \code{pls_fit_species} and +#' \code{pls_refit_species}, \code{logical}, default \code{FALSE}, when +#' building or rebuilding a PLS model, discard cases where \code{species} +#' is missing. + +################################ +# to do... +################################ +# link above to pls plot help page? +# document methods and references + + +#' @return \code{rsp_pls_profile} returns a list of nls models, one per +#' profile/measurement set in \code{rsp}. The \code{pls_} functions work with +#' these outputs. \code{pls_report} generates a \code{data.frame} of +#' model outputs, and is used of several of the other \code{pls_} +#' functions. \code{pls_fit_species}, \code{pls_refit_species} and +#' \code{pls_fit_parent} return the supplied \code{rsp_pls_profile} output, +#' updated on the basis of the \code{pls_} function action. +#' \code{pls_plot}s (documented separately) produce various plots +#' commonly used in source apportionment studies. + + +#' @note This implementation of PLS applies the following modeling constraints: +#' +#' 1. It generates a model of \code{rsp} that is positively constrained linear +#' product of the profiles in \code{ref}, so outputs can only be +#' zero or more. Although the model is generated using \code{\link{nls}}, +#' which is a Nonlinear Least Squares (NLS) model, the fitting term applied +#' in this case is linear. +#' +#' 2. The model is fit in the form: +#' +#' \eqn{X_{i,j} = \sum\limits_{k=1}^{K}{N_{i,k} * M_{k,j} + e_{i,j}}} +#' +#' Where X is the data set of measurements, \code{rsp}, M is data set of +#' reference profiles, \code{ref}, N is the data set of source contributions, +#' the source apportion solution, to be solved by minimising e, the error terms. +#' +#' 3. The number of species in \code{rsp} must be more that the number of +#' profiles in \code{ref} to reduce the likelihood of over-fitting. +#' + + +# GENERAL NOTES + +# TO DO +# link to CMB as crude form of CMB and reference? + +# these all need code tidying + +# check individual function notes + + +############################ +############################ +## rsp_pls_profile +############################ +############################ + +## now importing locally where possible +## data.table::[function] +## #' @import data.table + +#This is version 2 + +#version 1 combined version2 and pls_report +#now separated because it simplified pls_ model reworking + +#currently keeping the function args +# might not need to do this BUT +# model does not seem to be tracking them ... + +# check power handling is right + +######################### +#think about ? +######################### + +# should first arg be rsp.x rather than x or rsp ??? + +# maybe get formula into docs ??? + +# maybe split this into rsp.pls and then separate pls. documents ??? + +#' @rdname rsp.pls +#' @export + +rsp_pls_profile <- function(rsp, ref, + power = 1, + ...){ + + ################## + #quick tidy for now + ################## + x <- rsp + + ################## + #from rough code + ################## + + ######################## + #only allowing profiles < species + if(length(unique(ref$PROFILE_CODE)) >= length(unique(x$SPECIES_ID))){ + stop("rsp_pls: need n.species > n.profiles, more species or less profiles?", + call. = FALSE) + } + + x.args <- list(...) + + #################### + #make sure we only have one species / profile + #################### + #tidying + .pr.cd <- unique(x$PROFILE_CODE) + ## .xx <- respeciate:::rsp_tidy_profile(x) + .xx <- lapply(.pr.cd, function(y){ + .x <- x[x$PROFILE_CODE==y,] + .x <- rsp_average_profile(.x, y, .x$PROFILE_NAME[1]) + .x + }) + .xx <- data.table::rbindlist(.xx) +############################# +#currently just dropping them +#can't fit negatives + .xx <- .xx[.xx$.value >= 0, ] + .xx <- .xx[!is.na(.xx$.value),] +############################# + #should be same! redundant + .pr.cd <- unique(.xx$PROFILE_CODE) + + #################### + #reduce ref to just species in x + ################### + #no point to look at any species not in x + ref <- subset(ref, SPECIES_ID %in% unique(.xx$SPECIES_ID)) + + ################### + #nudge + ################### + #dropping nudge from version 2 + ## + #nb: method was nudge before analysis + #and a nudge back after + # nudge(identified.species)->pls->report->nudge back(identified.species) + + #if(!is.null(nudge)){ + # for(i in nudge){ + # #ref might have both WEIGHT_PERCENT and .value + # ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] <- + # ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10 + # .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] <- + # .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10 + # .xx[.xx$SPECIES_NAME==i, ".value"] <- + # .xx[.xx$SPECIES_NAME==i, ".value"] * 10 + # } + #} + + ############################## + #main step/ once per profile + ############################## + #can we replace this with data.table + ans <- lapply(.pr.cd, function(y){ + .test <- try({ + #need to try this because it does not always work + .x <- as.data.frame(.xx[.xx$PROFILE_CODE==y,]) + .x <- rsp_average_profile(.x, "test", "1_test") + + #might not need one of this-and-same-above + #might be better doing it here... + .tmp <- subset(ref, ref$SPECIES_ID %in% unique(.x$SPECIES_ID)) + + #could change this with rbindlist version?? + .ref <- intersect(names(.x), names(.tmp)) + .out <- rbind(.x[.ref], .tmp[.ref]) + .out <- rsp_dcast_profile(.out) + + #build formula and model args + .tmp <- names(.out) + .tmp <- .tmp[!.tmp %in% c("SPECIES_ID", "SPECIES_NAME", "test")] + names(.out)[names(.out) %in% .tmp] <- paste(".m_", names(.out)[names(.out) %in% .tmp], + sep="") + #zero cases for port function + .ls <- paste(".n_", .tmp, sep="") + .ls2 <- lapply(.ls, function(x){0}) + names(.ls2) <- .ls + .for <- paste("(.n_", .tmp, "*`.m_", .tmp, "`)", sep="", collapse = "+") + .for <- as.formula(paste("test~", .for)) + .wt <- 1/.out$test + ############################ + #note + ############################ + #nls wants lower and upper as vectors + #but seems to handle lists + # should check how this is done? + # might not translate sesnibly... + # pass upper, default INF??? + #also switch m_[profile] to n_[profile] + # so we have commonly notation... + + .out[is.na(.out)] <- 0 #testing + + args <- list(formula = .for, + data=.out, + start=.ls2, + lower=.ls2, + weights=.wt, + algorithm="port", + control=nls.control(tol=1e-5)) + args <- modifyList(args, x.args[names(x.args) %in% names(args)]) + args$weights <- args$weights^power + x.args <- list(power=power) + + #run nls/pls + ##################### + mod <- do.call(nls, args) +# mod <- nls(.for, data=.out, +# weights = (1/.out$test)^power, # think about weighting +# start=.ls2, lower=.ls2, +# algorithm="port", +# control=nls.control(tol=1e-5) #think about tolerance +# ) + + #if we need to calculate AIC on a case-by-case basis... + #for model, I think we need to use stats:::logLik.nls for AIC calc... + #see + #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r + #(currently calculating AIc on the lm model on the overall fit on + # all species in all profiles as part of pls_report) + + ################################### + #currently all-data stats in pls_report + # and returning list of models + ################################### + ##.tmp <- summary(mod)$coefficients + ##.p.mod <- .tmp[,4] + ##names(.p.mod) <- gsub("m_", "p_", names(.p.mod)) + ##.out <- data.frame(PROFILE_CODE = y, + ## t(.tmp[,1]), + ## t(.p.mod)) + ##.out + + #output list of mod + data + ################################ + #could add args? + # then drop power from pls_ function formals + # or allow as an overwrite only... + list(mod=mod, #model outputs + args=args, #model args + x.args=x.args) #rsp args + }, silent = TRUE) + if(class(.test)[1]=="try-error"){ + NULL + } else { + .test + } + }) + names(ans) <- .pr.cd + + #returns the list of nls models + #(assuming all viable, one per profile_code in x) + + #testing class options + class(ans) <- unique(c("rsp_pls", class(ans))) + return(ans) + +} + + +############################# +############################# +## pls_report +############################# +############################# + +#' @rdname rsp.pls +#' @export + +## now imports from xxx.r +## #' @import data.table + +# this is the model report table +# other pls_ functions use output +# so take care when changing anything... + +# to think about +############################### + +# drop intercept from diagnostics model..? +# can't decide if it should be there +# not in the pls_plot which are based on conventional SA plots... + +# calculate the x_[profile] (contributions) in pls_report +# currently doing this in several of the pls_plot's + +# should the diagnostics be calculated per-species??? +# if some species very large and some very small +# doing them on an all results basis will be overly positive + + +#test +#devtools::load_all() +#d1 <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\my.working.rds") +#ref <- rsp(c("4868", "4914", "8948", "91155", "91163", "95441", "95529")) +#mod <- rsp_pls_profile(d1, ref, power=2) + +pls_report <- function(pls){ + + ans <- lapply(names(pls), function(x){ + .xx <- pls[[x]] + if(!is.null(.xx)){ + .out <- .xx$args$data + .tmp <- summary(.xx$mod)$coefficients + .p.mod <- .tmp[,4] + names(.p.mod) <- gsub(".n_", ".p_", names(.p.mod)) + .out <- data.frame(PROFILE_CODE = x, + .out, + t(.tmp[,1]), + t(.p.mod), + pred = predict(.xx$mod, newdata=.xx$args$data), + check.names=FALSE) + .out + } else { + NULL + } + }) + ans <- data.table::rbindlist(ans, use.names=TRUE, fill=TRUE) + if(nrow(ans)==0){ + return(as.data.frame(ans)) + } + + ##################### + #working on + ##################### + # added x_[profile] (.n_[profile] * .m_[profile]) calculations here + # was done on fly in older plots... + # also changed m_[profile] to n_[profile] and [profile] to m_[profile] + # so annotation was consistent with equation in documentation... + # must be a better way of doing this... + + .tmp <- names(ans) + .tmp <- .tmp[grep("^.m_", .tmp)] + ans <- as.data.frame(ans) + for(i in .tmp){ + ans[,gsub("^.m_", ".x_", i)] <- ans[,gsub("^.m_", ".n_", i)] * ans[,i] + } + ans <- data.table::as.data.table(ans) + ans$.value <- ans$test + + ####################################### + # previous + # as all-species step + ####################################### + ## .mod <- lm(pred ~ 0 + .value, data = .out) + ## .out$adj.r.sq <- summary(.mod)$adj.r.squared + ## .out$slope <- summary(.mod)$coefficients[1, 1] + ## .out$p.slope <- summary(.mod)$coefficients[1, 4] + ## .out$AIC <- AIC(.mod) + ## .out + + ################################# + # replacing with... + ################################# + #by-species calculate stats + # guessing this could be done in data.table??? + .sp.ref <- unique(ans$SPECIES_NAME) + .tmp <- lapply(.sp.ref, function(x){ + .tmp <- subset(ans, SPECIES_NAME==x) + ################# + # note + ################# + # was previouslys pred ~ .value + # and reported intercept and intercept p + # + .mod <- lm(pred ~ 0 + .value, data = .tmp) + ########### + #(also noted in rsp_pls_profile) + #if we need to calculate aic based on the method parameters... + #need to read this: + #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r + #see stats:::logLik.nls for AIC calc... + .s.mod <- suppressWarnings(summary(.mod)) + #################### + #above suppress warnings + # is to hide the perfect fit warning + # you get if you fit a marker... + # option to jitters still there + ############# + data.frame(SPECIES_NAME = x, + adj.r.sq = .s.mod$adj.r.squared, + slope = .s.mod$coefficients[1, 1], + p.slope = .s.mod$coefficients[1, 4], + AIC = AIC(.mod) + ) + }) + .tmp <- data.table::rbindlist(.tmp) + ans <- merge(ans, .tmp, by="SPECIES_NAME") + + as.data.frame(ans) +} + + + + +############################# +############################# +## pls_test +############################# +############################# + +#' @rdname rsp.pls +#' @export + +## now imports from xxx.r +## #' @import data.table + +# this is the model tests +# this builds from pls_report + +pls_test <- function(pls){ + .rp <- pls_report(pls) + #species + .tmp<- lapply(unique(.rp$SPECIES_NAME), function(i){ + .ans <- subset(.rp, SPECIES_NAME==i) + data.frame(SPECIES_NAME = i, + adj.r.sq = .ans$adj.r.sq[1], + slope=.ans$slope[1], + p.slope=.ans$p.slope[1], + AIC = .ans$AIC[1]) + }) + .sp <- data.table::rbindlist(.tmp) + + #pls + ###################### + # not sure if we should focus on 'good' or 'bad' p-score here... + .pn <- names(.rp)[grepl("^.p_", names(.rp))] + .ans <- data.table::as.data.table(.rp)[, lapply(.SD, + function(x){length(x[x>0.05])/length(x)}), + .SDcols = .pn] + .ans <- as.data.frame(.ans) + .ans <- (1 - .ans)*100 + names(.ans) <- gsub("^.p_", "gp_", names(.ans)) + + list(.species=.sp, + .pls = .ans) +} + + + + + + + +#################################### +#################################### +## pls fitting +#################################### +#################################### + +#includes +# pls_fit_species and +# pls_refit_species +# pls_rebuild + + +#' @rdname rsp.pls +#' @export + +pls_fit_species <- function(pls, species, power=1, + refit.profile=TRUE, + as.marker=FALSE, + drop.missing=FALSE, + ...){ + #wrapper for multiple fits of new data to a pls model + .id <- unique(species$SPECIES_NAME) + for(i in .id){ + .sub.sp <- subset(species, SPECIES_NAME==i) + .test <- try(pls_rebuild(pls, species=.sub.sp, power=power, + refit.profile=refit.profile, + as.marker=as.marker, + drop.missing=drop.missing, + ...), + silent=TRUE) + if(class(.test)[1]=="try-error"){ + warning("RSP_PLS> failed to fit: ", i, sep="") + } else { + pls <- .test + } + } + pls +} + + + +#' @rdname rsp.pls +#' @export + +pls_refit_species <- function(pls, species, power=1, + refit.profile=TRUE, + as.marker=FALSE, + drop.missing=FALSE, + ...){ + #wrapper for multiple fits of new data to a pls model + .id <- species + for(i in .id){ + .test <- try(pls_rebuild(pls, species=i, power=power, + refit.profile=refit.profile, + as.marker=as.marker, + drop.missing=drop.missing, + ...), + silent=TRUE) + #pass back the error??? + if(class(.test)[1]=="try-error"){ + warning("RSP_PLS> failed to fit: ", i, sep="", + call.=FALSE) + } else { + pls <- .test + } + } + pls +} + + + +#' @rdname rsp.pls +#' @export + + +############################# +#this needs a lot of work +############################# + +# pls_fit_species and pls_refit_species +# are now multiple use wrappers for this... +# they for loop try(pls_rebuild(...)) + +# (like pls_(re)fit_'s) +# like to drop power from formals +# maybe ignore or pass from previous, but have option to overwrite via ...? + +# need to update the model handling so it is like sp_pls_profile +# this would sort power issue above +# also means the user can change setting themselves +# THINK ABOUT THIS +# they could make a pls that was not positively constrained +# this would also remove the start, lower and upper options +# from the formals... + +# if we are setting start and lower +# start = lower if start is missing might be safer... +# (see code in sp_pls_profile) + +#needs to allow more constraint +# currently not passing forward the args... + +#mod <- readRDS("C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/mod1.RDS") +#dat <- readRDS("C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/uk.metals.aurn.2b.rds") + +#pls_rebuild(mod, subset(dat, SPECIES_NAME=="[avg.AURN] O3"), power=2, as.marker=T) + + +pls_rebuild <- function(pls, species, power=1, + refit.profile=TRUE, + as.marker=FALSE, + drop.missing=FALSE, + ...){ + + x.args <- list(...) + #hiding model args + #also like to hide power + + .out <- pls_report(pls) + + #cheat + ######################### + .cheat <- character() + .cheat2 <- character() + + ######################### + #standardise inputs + ######################### + if(is.character(species)){ + #assuming this is SPECIES_NAME of the species to be fit + #and species was in modelled data when pls was built... + if(!species[1] %in% .out$SPECIES_NAME){ + stop("RSP_PLS> 'species' not in PLS, please check", + call. = FALSE) + } + .add <- subset(.out, SPECIES_NAME == species[1]) + .out <- subset(.out, SPECIES_NAME != species[1]) + + } else { + #assuming this is respeciate object/data.frame of right structure + .add <- species + } + + ################################### + #get and check species name and id + ################################### + sp.nm <- unique(.add$SPECIES_NAME) + sp.id <- unique(.add$SPECIES_ID) + #both need to be 1 element + if(length(sp.nm) !=1 || length (sp.id) != 1){ + stop("RSP_PLS> 'species' not unique, either missing or multiple", + call. = FALSE) + } + + #if as.marker is character + # use it as marker profile name and reset as.marker to TRUE + # else use species_name as profile name + # wondering if this should be more unique + if(is.character(as.marker)){ + .mrk.nm <- as.marker + as.marker <- TRUE + } else { + .mrk.nm <- sp.nm + } + + ##################### + #as.marker T/F handling + ##################### + if(as.marker){ + #treat species as marker + for(i in names(pls)){ + if(i %in% unique(.add$PROFILE_CODE) & !is.null(pls[[i]])){ + #remark off all print when happy with method + #print(i) + ######################### + #can simplify a lot below + ######################### + x <- pls[[i]] + .da <- subset(x$args$data, SPECIES_NAME != sp.nm) + .da[.mrk.nm] <- 0 + #.cht <- rev(unique(c("test", rev(names(.da))))) + #.da <- .da[.cht] + .da <- .da[rev(unique(c("test", rev(names(.da)))))] + .mn.df <- .da[1,] + #.mn.df[,1] <- sp.id + #.mn.df[,2] <- sp.nm + .mn.df[,c(1,2)] <- c(sp.id, sp.nm) + .mn.df[,3:(ncol(.da)-2)] <- 0 + ############################## + #below might want to be something other than 1 + # e.g. median other others excluding zero's??? + .mn.df[,ncol(.da)-1] <- 1 + ####################################### + #might need to add a jitter to next??? + ####################################### + #print("hi") + #print(.add) + #print(i) + #print(.add[.add$PROFILE_CODE==i,]) + .mn.df[,ncol(.da)] <- .add[.add$PROFILE_CODE==i, ".value"] + if(!is.na(.mn.df[,ncol(.da)])){ + + + ################################## + #a lot below needs more generalising + ################################### + pls[[i]]$args$data <- rbind(.da, .mn.df) + pls[[i]]$args$weights <- (1/pls[[i]]$args$data$test)^power + if(any(!grepl(.mrk.nm, pls[[i]]$args$formula))){ + #update formula + .for <- as.character(pls[[i]]$args$formula) + .for[3] <- paste(.for[3], "+ (`.m_", .mrk.nm, + "` * `.n_", .mrk.nm, "`)", + sep="") + pls[[i]]$args$formula <- as.formula(paste(.for[2], .for[1], + .for[3], sep="")) + } + if("start" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$start)){ + #print("adding .n_ start") + .arg <- pls[[i]]$args$start + .arg[[paste(".m_", .mrk.nm, sep="")]] <-0 + pls[[i]]$args$start <- .arg + } + } + if("lower" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$lower)){ + #print("adding .n_ lower") + .arg <- pls[[i]]$args$lower + .arg[[paste(".n_", .mrk.nm, sep="")]] <-0 + pls[[i]]$args$lower <- .arg + } + } + if("upper" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$upper)){ + #print("adding .n_ upper") + .arg <- pls[[i]]$args$upper + .arg[[paste(".n_", .mrk.nm, sep="")]] <- Inf + pls[[i]]$args$upper <- .arg + } + } + + #print(pls[[i]]$args$data) + #print(pls[[i]]$args$formula) + #print(pls[[i]]$args$weights) + ###################### + #nls model do.call might need a try wrapper + ######################## + .cheat2 <- c(.cheat2, i) + + pls[[i]]$mod <- do.call(nls, pls[[i]]$args) + } #stop it trying entry is NA + } else { + #can't build this model update, so drop it! + #either no marker or no previous model + ############################# + #might want to change this to + #leave them alone??? + # just might never get the o3 profile included + # or make the as.marker = FALSE drop the case it + # can't model...? + if(drop.missing){ + .cheat <- c(.cheat, i) + pls[i] <- list(NULL) + } + } + } + #print("doing these [mrk]") + #print(.cheat2) + #print("dropping these [mrk]") + #print(.cheat) + } else { + ###################################### + #species not a marker + ###################################### + #distribute across existing sources + ###################################### + + ############################### + #remark prints when happy with method + ############################### + + ######################### + #like to first better way of doing following + ######################### + + #need to build a unique data set of previous m matrix predictions + ################## + #.test <- .out[.out$pred>0,] + # (had to exclude pred = 0 because these were not yet modelled) + #.out <- subset(.out, SPECIES_ID == unique(.test$SPECIES_ID)[1]) + # (replacing with following because above dropped models if first species + # was missing from those profile_code) + # + .test <- .out[.out$pred>0,] + .out <- .test[!duplicated(.test$PROFILE_CODE),] + + .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT") + .test <- names(.add)[names(.add) %in% .test] + .data <- .add[.test] + + names(.data)[2] <- "refit" + .data <- merge(.out, .data[c(1:2)]) + + ######################### + #note + #if checking .data species may not be unique + # just after a unique (all profile_code) m matrix + # for the added + #print(.data) + + .ms <- names(.data)[grepl("^.n_", names(.data))] + .for <- paste("(`", .ms, "`*`", gsub("^.n_", ".m_", .ms), "`)", + sep="", collapse = "+") + .for <- as.formula(paste("refit~", .for)) + + .ns <- .ms + names(.ns) <- gsub("^.n_", ".m_", .ms) + + #note + ################## + #model handling temp update + #lower, start and upper + lower <- if("lower" %in% names(x.args)){ + x.args$lower + } else { + 0 + } + start <- if("start" %in% names(x.args)){ + x.args$start + } else { + lower + } + upper <- if("upper" %in% names(x.args)){ + x.args$upper + } else { + Inf + } + .ls <- lapply(.ns, function(x){start}) + .ls2 <- lapply(.ns, function(x){lower}) + .ls3 <- lapply(.ns, function(x){upper}) + + control <- if("control" %in% names(x.args)){ + x.args$control + } else { + nls.control(tol=1e-5) + } + + #print(.data) + #print(.for) + + mod <- nls(.for, data=.data, + #weights = (1/.out$test)^power, + #no weighting currently because species are all the same here! + start=.ls, + lower=.ls2, + upper=.ls3, + algorithm="port", + control=control #think about tolerance + ) + #check.names TRUE was applying make.names + # so turned off when building data.frames for pls model outputs + .ans <- data.frame( + PROFILE_CODE = .data$PROFILE_CODE, + SPECIES_ID = .add$SPECIES_ID[1], + SPECIES_NAME = .add$SPECIES_NAME[1], + t(coefficients(mod)), + test = .data$refit, + check.names=FALSE + ) + names(.ans) <- gsub("^.n_", "", names(.ans)) + + #print("doing these") + #print(.ans$PROFILE_CODE) + + #for each build model, put new models in pls + ################################### + #need to move this to working directly from models + for(i in unique(.ans$PROFILE_CODE)){ + .ii <- subset(.ans, PROFILE_CODE==i) + .ii <- .ii[names(.ii) != "PROFILE_CODE"] + .nn <- pls[[i]]$args$data + .nn <- subset(.nn, !SPECIES_NAME %in% unique(.ii$SPECIES_NAME)) + ########### + #cheat + ############# + #print(.nn) + #print(.ii) + .ii <- .ii[names(.ii) %in% names(.nn)] + ######################## + + pls[[i]]$args$data <- rbind(.nn, .ii) + #rebuild model + .for <- as.character(formula(pls[[i]]$mod)) + .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) + .ms <- names(pls[[i]]$args$data) + .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] + .ls <- lapply(.ms, function(x){0}) + names(.ls) <- paste(".n_", .ms, sep="") + .da <- pls[[i]]$args$data + pls[[i]]$args$weights <- (1/pls[[i]]$args$data$test)^power + pls[[i]]$args$control <- control + ################# + #can these go now..? + ################# + if("start" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$start)){ + #print("adding .n_ start") + .arg <- pls[[i]]$args$start + #.arg[[paste(".n_", .mrk.nm, sep="")]] <-0 + pls[[i]]$args$start <- .arg + } + } + if("lower" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$lower)){ + #print("adding .n_ lower") + .arg <- pls[[i]]$args$lower + #.arg[[paste("m_", .mrk.nm, sep="")]] <-0 + pls[[i]]$args$lower <- .arg + } + } + if("upper" %in% names(pls[[i]]$args)){ + if(!paste(".n_", .mrk.nm, sep="") %in% names(pls[[i]]$args$upper)){ + #print("adding .n_ upper") + .arg <- pls[[i]]$args$upper + #.arg[[paste(".n_", .mrk.nm, sep="")]] <- Inf + pls[[i]]$args$upper <- .arg + } + } + + } + if(drop.missing){ + ########################################## + #if we are dropping cases were species was + # not available, we need to drop the + # models that were not (re)fit... + #print("dropping these!") + .test <- names(pls)[!names(pls) %in% unique(.ans$PROFILE_CODE)] + #print(.test) + if(length(.test)>0){ + for(i in .test){ + pls[i] <- list(NULL) + } + } + } + } + + ################ + #refit.profiles + ################ + #this might be a little redundant now + + if(refit.profile){ + for(i in names(pls)){ + if(!is.null(pls[[i]])){ + #print(i) + #print(pls[[i]]$args$data) + #print(pls[[i]]$args$formula) + + pls[[i]]$mod <- do.call(nls, pls[[i]]$args) + #pls[[i]]$mod <- nls(.for, data=.da, + # weights = (1/.da$test)^power, # think about weighting + # start=.ls, lower=.ls, + # algorithm="port", + # control=nls.control(tol=1e-5) #think about tolerance + #) + #.for <- as.character(formula(pls[[i]]$mod)) + #.for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) + #.da <- pls[[i]]$args$data + #.ls <- pls[[i]]$args$lower + #print(.da) + #print(.ls) + #print((1/.da$test)^power) + #pls[[i]]$mod <- nls(.for, data=.da, + # weights = (1/.da$test)^power, # think about weighting + # start=.ls, lower=.ls, + # algorithm="port", + # control=nls.control(tol=1e-5) #think about tolerance + #) + #print("refit.profile") + } + } + } + ################ + #output + ################ + pls +} + +################# + + + + +#fix if nulls are an issue +############################ + +#mod3 <- mod3[unlist(lapply(mod3, function(x) !is.null(x)))] + +#test code +#################### + +#inc <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\_projects\\marylebone03\\.tmp.increment.rds") +#inc$PROFILE_CODE <- as.character(inc$`Start Date`) +#inc$PROFILE_NAME <- as.character(inc$`Start Date`) +#inc <- sp_build_rsp_x(inc, value=".value.inc") + +#sp_match_profile(inc, spq_pm(), matches=20) + +#aa <- sp_profile(c("3157", "4330310", "3941", "4027", "3961")) +#inc.metals <- subset(inc, !grepl("[[]avg.AURN[]]", SPECIES_NAME)) + +#moda <- sp_pls_profile(inc.metals, aa) +#modb <- sp_pls_profile(inc, aa) + +#moda2 <- pls_fit_parent(moda, subset(inc, SPECIES_NAME=="[avg.AURN] PM2.5")) + +#moda2i <- pls_fit_species(moda, subset(inc, SPECIES_NAME=="[avg.AURN] PM2.5")) + + + +############################ +#next steps +############################ + +#note + +# this is rebuild version 2 +# first version currently pls_rebuild.old (unexported) + +#tidy code +# go through and tidy messy code +# NB: data.frame names might be getting changed in some functions +# seemed to be happening in multiple refits.... +# looked like make.name(BAD-NAME) +# think about models with missing input +# leave in or drop?? +# or option to do both... ??? +# think about power and other nls arguments +# need to be handling these better... +# currently re-calaculating on rebuild +# BUT might need to be able to work with user input??? +# update the documents + +# have hidden perfect fit error in pls_report +# think that kills it anywhere +# but should check pls_plot... +# also could add a jigger when fitting marker in rebuild? + + + + + diff --git a/R/rsp.pls.plot.R b/R/rsp.pls.plot.R new file mode 100644 index 0000000..f3ff717 --- /dev/null +++ b/R/rsp.pls.plot.R @@ -0,0 +1,1272 @@ +#' @name rsp.pls.plot +#' @title Plots for use with (re)SPECIATE profile Positive Least Squares models +#' @aliases pls.plot pls_plot pls_plot_species pls_plot_profile + +#' @description +#' The \code{pls_plot} functions are intended for use with PLS models built +#' using \code{rsp_pls_profile} (documented separately). They generate some +#' plots commonly used with source apportionment model outputs. + +#' @param pls A \code{sp_pls_profile} output, intended for use with +#' \code{pls_} functions. +#' @param id numeric or character +#' identifying the species or profile to plot. If numeric, these are treated +#' as indices of the species or profile, respectively, in the PLS model; if +#' character, species is treated as the name of species and profile is treated +#' as the profile code. Both can be concatenated to produce multiple plots and +#' the special case \code{id = -1} is a short cut to all species or profiles, +#' respectively. +#' @param plot.type numeric, the plot type if +#' multiple options are available. +#' @param ... other arguments, typically passed on to the associated +#' \code{lattice} plot. +#' @param log (for \code{pls_plot_profile} only) logical, if \code{TRUE} this +#' applies 'log' scaling to the primary Y axes of the plot. + +######################### +# need to check terminology for this... +# The zero handling is a based on offset in plot(..., log="y", off.set) +# but automatically estimated... +# shifted type to plot.type because it conflicts with type in lattice::xyplot.... + +#' @return \code{pls_plot}s produce various plots commonly used in source +#' apportionment studies. + +# GENERAL NOTES + +# TO DO + +# these all need code tidying + +# check individual function notes + + + +#################################### +################################### +## pls_plots +################################### +################################### + +#these are all draft + + +################################### +################################### +## pls_plot +################################### +################################### + + +#' @rdname rsp.pls.plot +#' @export + +## this replaces previous pls_plot (now pls_plot.old) + +## now imports via data.table:: +## need this to kill the as.data.table load message +## #' @import data.table + +#test +#devtools::load_all() +#d1 <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\my.working.rds") +#ref <- rsp(c("4868", "4914", "8948", "91155", "91163", "95441", "95529")) +#mod <- rsp_pls_profile(d1, ref, power=2) +#pls_plot(mod) + + +pls_plot <- function (pls, plot.type = 1, ...){ + + #current using lattice/latticeExtra for the panelling/layers... + + ######################## + # to do + ######################## + # id + + #basic plots in development... + # plot element ordering + # currently as it comes... + # because *I think* pls_report kills all pre-model handling... + # no id handling + # that maybe need to be in plot type... + # maybe also want to do it at end + # so missing case locations are retained (if needed for plot)... + + ############################ + # nags + ############################ + + # pls_plot(..., horizontal=FALSE) errors + # should flip x and y... + + # type = 1 + ############################ + #in development + #pls fit summary + # simple proportional fit plot + #think like + # https://latticeextra.r-forge.r-project.org/man/postdoc.html + # (but without the 100 percent (proportion=1) limit...) + + + # type = 2 + ############################ + #to do? + + ################# + #setup + ################# + .x.args <- list(...) + dat <- pls_report(pls) + .ord.pro.c <- .rsp_profile_code_order(dat) + ###################################### + #option to not do name simplification? + # + dat$SPECIES_NAME <- .rsp_tidy_species_name(dat$SPECIES_NAME) + .sp.ref <- unique(dat$SPECIES_NAME) + + #type + if(!plot.type %in% c(1)){ + stop("pls_plot: plot.type unknown, check ?pls_plot", + call. = FALSE) + } + + ############################ + #type 1 + ############################ + if(plot.type==1){ + ################################## + # note + # maybe use .rsp_get_prop_from_pls + # but check naming change...??? + ################################## + .tmp <- names(dat) + .tmp <- .tmp[grep("^.x_", .tmp)] + .refs <- c(.tmp, "pred") + #make summary pls. prop.table + .ans <- lapply(.sp.ref, function(x){ + .tmp <- subset(dat, SPECIES_NAME==x) + .d2 <- .tmp[1, c("SPECIES_NAME", .refs)] + for(.ref in .refs){ + #use only paired cases to calculate skew... + .tmp2 <- .tmp[c(.ref, ".value")] + .tmp2[.tmp2==0] <- NA + .tmp2 <- na.omit(.tmp2) + .d2[, .ref] <- sum(.tmp2[,.ref], na.rm=TRUE) / sum(.tmp2[,".value"], na.rm=TRUE) + } + .d2 + }) + .ans <- do.call(rbind, .ans) + + #barchart formula + .for <- paste(.tmp, collapse="+") + .for <- as.formula(paste("SPECIES_NAME~", .for, sep="")) + .tmp <- gsub("^.x_", "", .tmp) + #plot lists + gr.ls <- list(h=-1, v=-1, col="grey", lty=3) + if("grid" %in% names(.x.args) && is.list(.x.args$grid)){ + gr.ls <- modifyList(gr.ls, .x.args$grid) + .x.args$grid <- NULL + } + bar.ls <- list(v=c(0.5,1,2), lty=c(3,2,3), col="red") + if("bars" %in% names(.x.args) && is.list(.x.args$bars)){ + bar.ls <- modifyList(bar.ls, .x.args$bars) + .x.args$bars <- NULL + } + if("col" %in% names(.x.args)){ + #could allow function as col input??? + .cols <- rep(.x.args$col, length.out=length(.tmp)) + } else { + .cols <- rainbow(length(.tmp)) + .x.args$col <- .cols + } + ky.ls <- list(space="right", text=list(text=.tmp), + rect=list(col=.cols)) + if("key" %in% names(.x.args) && is.list(.x.args$key)){ + if(any(c("x", "y") %in% names(.x.args$key))){ + ky.ls$space <- NULL + } + .x.args$key <- modifyList(ky.ls, .x.args$key) + } else { + .x.args$key <- ky.ls + } + ##################################### + #note + #maybe use .rsp_panelpal for grid and bars + # see pls_plot_profile ??? + ##################################### + pl.ls <- list(x=.for, data=.ans, origin=0, stack=TRUE, + grid=TRUE, bars=TRUE, xlim=c(-0.025, NA), + xlab="mean(model) / mean(measurements)", + #prepanel=function(...){ + # ans <- lattice::prepanel.default.bwplot(...) + # print(ans) + # ans + #}, + panel=function(...){ + .temp <- list(...) + if(.temp$grid){ + do.call(panel.grid, gr.ls) + } + panel.barchart(...) + if(.temp$bars){ + do.call(panel.abline, bar.ls) + } + } + ) + pl.ls <- modifyList(pl.ls, .x.args) + p <- do.call(barchart, pl.ls) + } + + #output + ############################ + #this needs working up based on input from Dennis... + plot(p) + return(invisible(.ans)) +} + + + + + + + + + +#################################### +#################################### +## pls_plot_profile +#################################### +#################################### + + +#' @rdname rsp.pls.plot +#' @export + +## now imports from xxx.r +## #' @import data.table + + +############################# +#this needs a lot of work +############################# + +#test +#devtools::load_all() +#d1 <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\my.working.rds") +#ref <- rsp(c("4868", "4914", "8948", "91155", "91163", "95441", "95529")) +#mod <- rsp_pls_profile(d1, ref, power=2) +#pls_plot_profile(mod) + + +# log scale may need work +# but that is in rsp_plot_profile/unexported functions... + +pls_plot_profile <- function (pls, plot.type=1, log = FALSE, ...) +{ + #new version of pls_plot + + #to do + ############################## + # log (needs better axes control) but that is in rsp_profile_plot + # id needs to be enabled... + + #setup + ############################# + .x.args <- list(...) + dat <- pls_report(pls) + .ord.pro.c <- .rsp_profile_code_order(dat) + ###################################### + #option to not do name simplification? + # + dat$SPECIES_NAME <- .rsp_tidy_species_name(dat$SPECIES_NAME) + .sp.ref <- unique(dat$SPECIES_NAME) + + #plot.type control + if(!plot.type %in% c(1)){ + stop("pls_plot_profile: plot.type unknown, check ?pls_plot_profile", + call. = FALSE) + } + + ############################ + #type 1 + ############################ + if(plot.type==1){ + #make first plot and output .ans + ############################## + #get profiles .m_ columns + .ans <- .rsp_get_m_from_pls(dat) + .p1.prof <- unique(.ans$PROFILE_CODE) + #send to rsp_plot_profile with any user arguments + # to make first plot + #set cols + #set cols + p1.ls <- list(rsp=.ans, layout =c(1, length(.p1.prof)), log=log, + multi.profile = "panel", id=1:length(.p1.prof), + order=FALSE, silent=TRUE) + if(!"col" %in% names(p1.ls)){ + #maybe need better handling + p1.ls$col <- trellis.par.get("superpose.symbol")$col[1] + } + #issue with species_code not being known made this... + p1.ls <- modifyList(p1.ls, .x.args) + p1 <- do.call(rsp_plot_profile, p1.ls) + + #make second plot and .ans2 + ###################################### + .ans2 <- .rsp_get_prop_from_pls(dat) + .ans2$.pc <- .ans2$.prop * 100 + #could do this in the panel so any missing is greyed out ??? + .ans2$.pc[is.na(.ans2$.pc)] <- 0 + p2.ls <- .rsp_panelPal("tc", list(x =.pc~factor(SPECIES_NAME)|factor(PROFILE_CODE), + data=.ans2, + type=c("h", "p"), pch=18, layout=c(1,7), + ylab="Total Contribution (%)", + scales=list(x=list(rot=90))), + #note: function is cheat to use .rsp... outside lattice + # could make it the default if no panel set in call??? + function(...){list(...)}, ...) + #if tc layer not turned off.. + if(!is.null(p2.ls)){ + if(!"col" %in% names(p2.ls)){ + #maybe need better handling + p2.ls$col <- trellis.par.get("superpose.symbol")$col[2] + } + p2 <- do.call(xyplot, p2.ls) + p1 <- update(doubleYScale(p1, p2, add.ylab2 = TRUE), + par.settings = simpleTheme(col=c(p1.ls$col[1], p2.ls$col[1])) + ) + } + } + + #output + ############################ + #this needs working up based on input from Dennis... + plot(p1) + return(invisible(list(profile = .ans, tc = .ans2))) + +} + + + + + + + + +#################################### +#################################### +## pls_plot_species +#################################### +#################################### + + + + +#' @rdname rsp.pls.plot +#' @export + +## now imports from xxx.r +## #' @import data.table + + +############################# +#this needs a lot of work +############################# + +#test +#devtools::load_all() +#d1 <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\test\\my.working.rds") +#ref <- rsp(c("4868", "4914", "8948", "91155", "91163", "95441", "95529")) +#mod <- rsp_pls_profile(d1, ref, power=2) +#pls_plot_species(mod) + +# id enabled but +# species order is always as supplied... +# probably actually alphabetic +# look like order(character(unique(PROFILE_CODE))) + +# to do +# limit default output to < 7 plots? +# .rsp_panelpal handling like other plots +# layer .mod ??? +# log ??? (not sure it is needed/useful) +# decide how to reorder or rename species, profiles and x data +# (do this in plots and data ???) +# decide how to modify .index + +pls_plot_species <- function (pls, id, plot.type=1, ...) +{ + #new version of pls_plot + + #to do + ############################## + # most stuff + # log not sure about doing them... + # id + + #setup + ############################# + .x.args <- list(...) + dat <- pls_report(pls) + .ord.pro.c <- .rsp_profile_code_order(dat) + .sp.ref <- unique(dat$SPECIES_NAME) + my.species <- if (missing(id)) { + .sp.ref + #default option (print the lot...) + ############################ + #possibly a warning if lots of species to plot + ################## + } else { + id + } + if (is.numeric(my.species)) { + if (all(my.species == -1)) { + my.species <- .sp.ref + } + else { + my.species <- .sp.ref[my.species] + } + } + if(length(my.species)>6 & missing(id)){ + #to think about + # option to turn off warning??? + # (using in older versions of code) + #if(!silent){ + warning("RSP/PLS> ", length(my.species), " species... ", + "just showing first 6 to reduce plot clutter", + "\n\t (maybe use id to force larger range if sure)", + sep="", call.=FALSE) + #} + my.species <- my.species[1:6] + } + + ###################################### + #option to not do name simplification? + # + dat$SPECIES_NAME <- .rsp_tidy_species_name(dat$SPECIES_NAME) + .sp.ref <- unique(dat$SPECIES_NAME) + my.species <- .rsp_tidy_species_name(my.species) + + if (!any(my.species %in% .sp.ref)) { + stop("pls_plot_species> unknown species, please check", call. = FALSE) + } + + .tmp <- dat[c("SPECIES_NAME", "PROFILE_CODE", ".value")] + .tmp <- data.table::as.data.table(.tmp) + .tmp <- data.table::dcast(.tmp, PROFILE_CODE ~ SPECIES_NAME, + mean, + na.rm=TRUE, + value.var = ".value") + .tmp2 <- data.table::melt(.tmp, id.vars="PROFILE_CODE", variable.name="SPECIES_NAME", + value.name=".value") + .tmp <- dat[c("SPECIES_NAME", "PROFILE_CODE", "pred")] + .tmp <- data.table::as.data.table(.tmp) + .tmp <- data.table::dcast(.tmp, PROFILE_CODE ~ SPECIES_NAME, + mean, + na.rm=TRUE, + value.var = "pred") + .tmp <- data.table::melt(.tmp, id.vars="PROFILE_CODE", variable.name="SPECIES_NAME", + value.name="pred") + .tmp <- data.table::merge.data.table(.tmp2, .tmp) + .tmp <- as.data.frame(.tmp) + .tmp$.index <- as.numeric(factor(.tmp$PROFILE_CODE, levels=.ord.pro.c, + ordered = TRUE)) + .tmp<- .tmp[order(.tmp$.index),] + + #plot.type control + if(!plot.type %in% c(1,2)){ + stop("pls_plot_species: plot.type unknown, check ?pls_plot_profile", + call. = FALSE) + } + + .tmp <- subset(.tmp, SPECIES_NAME %in% my.species) + + ############################ + #type 1 + ############################ + if(plot.type==1){ + .mc <- if ("mod.col" %in% names(.x.args)) { + .x.args$mod.col + } else { + "red" + } + plt <- list(x=pred~.value | SPECIES_NAME, data=.tmp, + #prepanel forces x and y lims to same range + prepanel=function(...){ + .tmp <- prepanel.default.xyplot(...) + .tmp$xlim <- range(c(.tmp$xlim, .tmp$ylim)) + .tmp$ylim <- .tmp$xlim + .tmp + }, + panel= function(x, y, xlim, ylim, ...){ + #user control of grid - like loa... + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + lattice::panel.grid, ...) + #TO DO + #user control of y=x + panel.ablineq(a = 0, b = 1, adj = c(0,1), + col.line ="grey", lty=2, label="") + #user control of main plotted data via + # standard lattice + panel.xyplot(x=x, y=y, ...) + #CURRENTLY JUST col via mod.col + #user control of model + panel.ablineq(lm(y ~ x + 0), cex = 0.8, + x = min(c(x, y), na.rm=TRUE), + y = max(c(x, y), na.rm=TRUE), + r.squared = TRUE, adj = c(0,1), + sep = " (", sep.end = ")", + offset=0, varStyle = NULL, + col.line = .mc, col.text = .mc, digits = 2) + }, + xlab="Measurement", ylab="model", + scales=list(y=list(relation="free", + rot=90), + x=list(relation="free"))) + plt <- modifyList(plt, .x.args) + p <- do.call(xyplot, plt) + + } + if(plot.type==2){ + #xlab + if(!"xlab" %in% names(.x.args)){ + .x.args$xlab <- "Sample [index]" + } + if(!"ylab" %in% names(.x.args)){ + .x.args$ylab <- "Measurement, Model" + } else { + if(length(.x.args$ylab)>1){ + if(!"key.text" %in% names(.x.args)){ + .x.args$key.text <- .x.args$ylab[1:2] + } + .x.args$ylab <- paste(.x.args$ylab[1], .x.args$ylab[2], sep =", ") + } + } + if(!"key.text" %in% names(.x.args)){ + .x.args$key.text <- c("Measurement", "Model") + } + if(!"col" %in% names(.x.args)){ + .x.args$col <- trellis.par.get("superpose.line")$col[1:2] + } + if("mod.col" %in% names(.x.args)){ + .x.args$col <- c(.x.args$col[1], .x.args$mod.col) + } + p2.ls <- list(x= .value + pred ~ .index | SPECIES_NAME, data=.tmp, + auto.key = list(text=.x.args$key.text, + space="top", columns=2), + type="l", + panel= function(...){ + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + panel.grid, ...) + lattice::panel.xyplot(...) + }, + scale=list(relation="free"), + par.settings = simpleTheme(col=.x.args$col)) + p2.ls <- modifyList(p2.ls, .x.args) + p <- do.call(xyplot, p2.ls) + plot(p) + + } + + #output + ############################ + #this needs working up based on input from Dennis... + plot(p) + return(invisible(.tmp)) + +} + + + + + + + + + + + + + + +###################################### +###################################### +## unexported +###################################### +###################################### + +#old +# holding until new versions are fully up and running... + + + +## pls_plot.old + +############################ +# currently not exporting... +############################ + +# #' @rdname rsp.pls.plot +# #' @export + +## now imports via data.table:: +## need this to kill the as.data.table load message +## #' @import data.table +## + +############################# +#this needs a lot of work +############################# + +# this uses unexported rsp_profile_pie function below... +# both pls_plot and rsp_profile_pie need work... + + +pls_plot.old <- function (pls, n, plot.type = 1, ...){ + + #current using lattice/latticeExtra for the panelling/layers... + + #basic plots finished but... + # currently not passing arguments generally + # the par setting seem to be dropped when using plot(p) + # ahead of end of function + + ############################ + # nags + ############################ + + # type = 1 + ############################ + + # note sure about the layer naming + # zero is not bottom of barchart... + + # type = 2 + ############################ + + # the label positioning is messy (see not about nudge) + + # cex setting too small if only one panel... + + #wondering about + # https://latticeextra.r-forge.r-project.org/man/postdoc.html + # as an alternative to type=2 + # (but 100 percent measured rather than proportion...) + + ################# + #setup + ################# + .x.args <- list(...) + dat <- pls_report(pls) + .ord.pro.c <- .rsp_profile_code_order(dat) + #dat$SPECIES_NAME <- .rsp_tidy_species_name(dat$SPECIES_NAME) + .sp.ref <- unique(dat$SPECIES_NAME) + #species + # now defaulting to all plots + species <- if (missing(n)) { + species <- .sp.ref + } + else { + n + } + if (is.numeric(species)) { + if (all(species == -1)) { + species <- .sp.ref + } + else { + species <- .sp.ref[species] + } + } + if (!any(species %in% .sp.ref)) { + stop("RSP_PLS> unknown species, please check", call. = FALSE) + } + ################################ + #note: + # could condition here BUT currently + # holding on to everything until just before plot + # might not need to do this.... + ################################# + + .sp.ord <- unique(dat$SPECIES_ID) + ##################################### + #messy at moment... + .sp.m.pro <- names(dat)[grep("^.n_", names(dat))] + .sp.pro <- gsub("^.n_", "", .sp.m.pro) + + #line col.... + .col <- lattice::trellis.par.get("superpose.line")$col[1] + + #bar cols + .cols <- if ("col" %in% names(.x.args)) { + #could include if you supply a function..? + #could use col.regions? + .cols <- .x.args$col + } + else { + .cols <- heat.colors(n = length(.sp.m.pro)) + } + if (length(.cols) != length(.sp.m.pro)) { + stop("pls_plot> halted; expecting ", length(.sp.m.pro), + "colours; given ", length(.cols), sep = "", call. = FALSE) + } + + ###################### + # build x_[profile] + ###################### + #now done in pls_report + for (i in .sp.pro) { + dat[, paste(".x_", i, sep = "")] <- dat[, paste(".m_", i, sep = "")] * + dat[, paste(".n_", i, sep = "")] + } + .sp.x.pro <- names(dat)[grep("^.x_", names(dat))] + .rep <- dat[c("SPECIES_NAME", "SPECIES_ID", "PROFILE_CODE", + .sp.x.pro)] + .rep <- data.table::melt(data.table::as.data.table(.rep), + id = c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE")) + .tot <- data.table::as.data.table(dat) + .cs <- c(".value", "pred", .sp.x.pro) + .tot <- .tot[, lapply(.SD, function(x) sum(x, na.rm = TRUE)), + .SDcols = .cs, by = c("SPECIES_ID", "SPECIES_NAME")] + + ########################### + # now plotting as panels + # using + ########################### + + ###################################################### + # now using rsp_ function to track all pls model cases + # previous method only tracked valid cases for the plotted data + # so no gaps where models dropped/not built... + ######################################################### + .rep$.index <- as.numeric(factor(.rep$PROFILE_CODE, levels = .ord.pro.c, + ordered = TRUE)) + dat$.index <- as.numeric(factor(dat$PROFILE_CODE, levels = .ord.pro.c, + ordered = TRUE)) + + .tmp <- dat[c("SPECIES_ID", "PROFILE_CODE", ".index", ".value", "pred")] + .rep <- data.table::merge.data.table(.rep, .tmp) + + .rep$variable <- gsub("^x_", "", .rep$variable) + + #print(names(.rep)) + #return(dat) + + .rep <- subset(as.data.frame(.rep), SPECIES_NAME %in% species) + + if (1 %in% plot.type) { + + #lattice sets panel order based + .sp <- if(is.factor(.rep$SPECIES_NAME)){ + levels(.rep$SPECIES_NAME) + } else { + sort(unique(.rep$SPECIES_NAME)) + } + .sp <- .sp[.sp %in% .rep$SPECIES_NAME] + #.y.scale <- lapply(unique(.rep$SPECIES_NAME), function(x){ + .y.scale <- lapply(.sp, function(x){ + .tmp <- .rep[.rep$SPECIES_NAME==x,] + c(0, max(c(.tmp$.value, .tmp$pred), na.rm=TRUE)) + }) + ############################################### + #use loa method to generalise this? + ############################################### + + p2 <- lattice::xyplot(.value ~ .index | SPECIES_NAME, .rep, + panel=lattice::panel.xyplot, + type="s", xlab="Sample [index]", + ylab="Measurement", + scales=list(relation="free"), + ylim=.y.scale) + + p <- lattice::barchart(value ~ factor(.index) | SPECIES_NAME, .rep, + groups=.rep$variable, stack=TRUE, + panel=function(x, y, col, groups, ..., subscripts){ + #grid control like loa + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + lattice::panel.grid, ...) + lattice::panel.barchart(x=x, y=y, col=col, + groups=groups, + subscripts=subscripts, ...) + .y <- .rep$.value[subscripts] + #col needs to be from option[1] + lattice::panel.xyplot(x=x, y=.y, + col=.col, + type="l", + subscripts=subscripts,...) + }, + scales=list(relation="free"), + #auto.key=list(space="top", columns=2, + # col.line=.cols, + # points=FALSE, rectangles=TRUE), + ylim=.y.scale, + col=.cols, + border=NA, + #par.settings = list(superpose.polygon = list(col = .cols, + # pch =c (15, 15)), + #superpose.symbol = list(fill = .cols)), + auto.key=list(space="top", columns = 3, + cex=0.8, + points=FALSE, + rectangles=TRUE)) #, + #xscale.components = function(lim,...){ + # lim <- as.numeric(as.character(lim)) + # ans <- lattice::xscale.components.default(lim=lim,...) + # print(ans) + # ans + #}) + plot(update(latticeExtra::doubleYScale(p2, p, add.axis = FALSE), + par.settings = list(superpose.polygon = list(col = .cols), + superpose.symbol = list(fill = .cols)))) + + #p2 <- lattice::xyplot(.value ~ factor(.index) | SPECIES_NAME, dat, + # type="l", scales=list(relation="free")) + #plot(cheat(p, latticeExtra::as.layer(p2))) + + #plot(latticeExtra::doubleYScale(p, p2, add.axis=FALSE, add.ylab2=FALSE)) + } + if (2 %in% plot.type) { + + + p <- lattice::xyplot(value ~ .index | SPECIES_NAME, .rep, + groups=.rep$variable, + totals=.rep$.value, + scales=list(relation="free", + draw=FALSE), + ylab="", xlab="", + col = .cols, + auto.key=list(space="top", columns = 3, + cex=0.8, + points=FALSE, + rectangles=TRUE), + ylim=c(-2,2), xlim=c(-2,2), + between = list(x = 0.2, y = 0.2), + panel=.rsp_panel.pie, + par.settings = list(superpose.polygon = list(col = .cols), + axis.line = list(col = 'transparent'), + superpose.symbol = list(fill = .cols)) + ) + plot(p) + + } + invisible(.rep) +} + + + + +######################## +#currently not exporting +######################## + + +pls_plot_profile.old <- function (pls, n, log = FALSE, ...) +{ + ######################### + #previous plot used base r graphics + #this moved to lattice/latticeExtra + #so we can panel outputs + ######################### + + #setup + .x.args <- list(...) + .plt.args <- .x.args[names(.x.args %in% c())] + dat <- pls_report(pls) + .sp.ord <- unique(dat$SPECIES_ID) + .sp.m.pro <- names(dat)[grep("^.m_", names(dat))] + .sp.pro <- gsub("^.m_", "", .sp.m.pro) + #defaulting n to all profiles as one plot + profile <- if (missing(n)) { + profile <- .sp.pro + } else { + n + } + if (is.numeric(profile)) { + if (all(profile == -1)) { + profile <- .sp.pro + } + else { + profile <- .sp.pro[profile] + } + } + if (!any(profile %in% .sp.pro)) { + stop("RSP_PLS> unknown profile(s), please check", call. = FALSE) + } + + ######################### + #build x_[profile] + ######################### + n_profile <- paste(".n_", profile, sep = "") + m_profile <- paste(".m_", profile, sep = "") + dat <- dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", + n_profile, m_profile, "pred", ".value")] + for (i in profile) { + dat[, paste(".x_", i, sep = "")] <- dat[, paste(".m_", i, sep = "")] * + dat[, paste(".n_", i, sep = "")] + } + + .rep <- data.table::as.data.table(dat) + .cols <- c(".value", "pred", paste(".x_", profile, sep = "")) + .rep <- .rep[, lapply(.SD, function(x) sum(x, na.rm = TRUE)), + .SDcols = .cols, by = c("SPECIES_ID", "SPECIES_NAME")] + .rep <- as.data.frame(.rep) + + ######################### + # y2 setup + ######################### + # by default this is .value + # but might want mod prediction + if ("y2" %in% names(.x.args) && .x.args$y2 == "pred") { + for (i in profile) { + .rep[, paste("pc_", i, sep = "")] <- + (.rep[, paste(".x_", i, sep = "")]/.rep$pred) * 100 + } + } + else { + for (i in profile) { + .rep[, paste("pc_", i, sep = "")] <- + (.rep[, paste(".x_", i, sep = "")]/.rep$.value) * 100 + } + } + #might not need all of following now we + #we are not pulling apart to plot one at time... + dat <- dat[!duplicated(dat$SPECIES_NAME), ] + dat$PROFILE_NAME <- dat$PROFILE_NAME[1] + dat$PROFILE_CODE <- dat$PROFILE_CODE[1] + dat <- merge(.rep, dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", + m_profile)], ) + dat <- dat[order(ordered(dat$SPECIES_ID, levels = .sp.ord)), ] + + + ################################ + # build pc_[profile] + ################################ + rownames(dat) <- 1:nrow(dat) + .ref <- names(dat)[grep("pc_", names(dat))] + .oth <- c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", ".value", "pred") + .temp <- data.table::as.data.table(dat[c(.oth, gsub("^pc_", ".x_", .ref))]) + .d1 <- data.table::melt(.temp, measure.vars = gsub("^pc_", ".x_", .ref), + variable.name = "pls_profile", value.name = "loading") + .temp <- data.table::as.data.table(dat[c(.oth, .ref)]) + .d2 <- data.table::melt(.temp, measure.vars = .ref, + variable.name = "pls_profile", value.name = "percent_contr") + .d2$pls_profile <- gsub("^pc_", ".x_", .d2$pls_profile) + dat <- as.data.frame(merge(.d1, .d2, all=T)) + ############################# + + ############################ + #now using lattice to handle logs + ############### + #.dat <- dat + #don't need local version of dat because not changing data ahead of plot + #if(log){ + # .dat$loading <- log10(.dat$loading) + # .ylim <- lapply(profile, function(x){ + # .temp <- subset(.dat, pls_profile==x) + # .temp <- range(.temp$loading, na.rm=TRUE, finite=TRUE) + # if(.temp[1] == .temp[2]){ + # .temp <- c(.temp[1]-1, .temp[1]+1) + # } + # range(c(floor(.temp), ceiling(.temp))) + # }) + #} else { + # .ylim <- lapply(profile, function(x){ + # .temp <- subset(.dat, pls_profile==x) + # .temp <- range(.temp$loading, na.rm=TRUE, finite=TRUE) + # range(pretty(.temp)) + # }) + #} + + ###################### + #plot + ###################### + #now using lattice/latticeExtra + ## + #think there is more here that can be generalized... + p1.ls <- list(x = loading~SPECIES_NAME | pls_profile, + data=dat, ylab="Source Loading", + panel = function(...){ + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + panel.grid, ...) + panel.barchart(...) + }, + between=list(y=.2), + scales=list(x=list(rot=90), + y=list(rot=c(0,90), + relation="free")), + layout=c(1,length(profile))) + if(log){ + p1.ls$scales$y$log=10 + p1.ls$yscale.components <- .rsp_yscale.component.log10 + } + p1.ls <- modifyList(p1.ls, .x.args) + if(!"col" %in% names(p1.ls)){ + p1.ls$col <- trellis.par.get("superpose.line")$col[1] + } + p1 <- do.call(barchart, p1.ls) + if("mod" %in% names(.x.args) && !.x.args$mod){ + #if mod FALSE just plot 1 + plot(p1) + } else { + #add mod layer (total contributions) as y2 + .col2 <- if("mod.col" %in% names(.x.args)){ + .x.args$mod.col + } else { + trellis.par.get("superpose.line")$col[2] + } + p2.ls <- list(x = percent_contr ~ factor(SPECIES_NAME) | pls_profile, + pch=16, type=c("h", "p"), col= c(.col2, .col2), + ylab="Total Contribution (%)", + data=dat) + .tmp <- .x.args[grepl("^mod[.]", names(.x.args))] + if(length(.tmp)>0){ + names(.tmp) <- gsub("^mod[.]", "", names(.tmp)) + p2.ls <- modifyList(p2.ls, .tmp) + } + p2 <- do.call(xyplot, p2.ls) + plot(update(doubleYScale(p1, p2, add.ylab2 = TRUE), + par.settings = simpleTheme(col=c(p1.ls$col[1], .col2)))) + } + + ############ + #output + ############ + #could pass plot and data as list??? + return(invisible(dat)) +} + + + +## #' @rdname rsp.pls.plot +## #' @export + +## now imports from xxx.r +## #' @import data.table + +############################# +#this needs a lot of work +############################# + +#currently not exporting + +pls_plot_species.old <- function (pls, id, plot.type = 1, ...) +{ + + #not including NAs.... + + ########################### + # setup + ########################### + .x.args <- list(...) + dat <- pls_report(pls) + .ord.pro.c <- .rsp_profile_code_order(dat) + .sp.ref <- unique(dat$SPECIES_NAME) + species <- if (missing(id)) { + .sp.ref + #default option (print the lot...) + ############################ + #possibly a warning if lots of species to plot + ################## + } else { + id + } + if (is.numeric(species)) { + if (all(species == -1)) { + species <- .sp.ref + } + else { + species <- .sp.ref[species] + } + } + if (!any(species %in% .sp.ref)) { + stop("RSP_PLS> unknown species, please check", call. = FALSE) + } + ############################ + #if not earlier, then here? + #possibly a warning if lots of species to plot + ################## + + ######################### + #could drop a lot of this?? + ######################### + .xlb <- if ("xlab" %in% names(.x.args)) { + .x.args$xlab + } else { + "Measurement" + } + .ylb <- if ("ylab" %in% names(.x.args)) { + .x.args$ylab + } else { + "Model" + } + .bc <- if ("col" %in% names(.x.args)) { + .x.args$col + } else { + par("col") + } + .mc <- if ("mod.col" %in% names(.x.args)) { + .x.args$mod.col + } else { + "red" + } + dat <- subset(dat, SPECIES_NAME %in% species) + # lims <- range(c(d2$.value, d2$pred), na.rm = TRUE, finite = TRUE) + # mod <- lm(pred ~ 0 + .value, d2) + # .sum <- paste("y = ", signif(summary(mod)$coefficients[1, + # 1], 3), "x (adj.R2 = ", signif(summary(mod)$adj.r.squared, + # 3), ")", sep = "") + .lims <- lapply(species, function(x){ + .d <- subset(dat, SPECIES_NAME==x) + range(c(.d$pred, .d$.value), finite=TRUE, na.rm=TRUE) + }) + if (1 %in% plot.type) { + .mc <- if ("mod.col" %in% names(.x.args)) { + .x.args$mod.col + } else { + "red" + } + p1.ls <- list(x=pred~.value | SPECIES_NAME, data=dat, + #prepanel forces x and y lims to same range + prepanel=function(...){ + .tmp <- prepanel.default.xyplot(...) + .tmp$xlim <- range(c(.tmp$xlim, .tmp$ylim)) + .tmp$ylim <- .tmp$xlim + .tmp + }, + panel= function(x, y, xlim, ylim, ...){ + #user control of grid - like loa... + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + lattice::panel.grid, ...) + #TO DO + #user control of y=x + panel.ablineq(a = 0, b = 1, adj = c(0,1), + col.line ="grey", lty=2, label="") + #user control of main plotted data via + # standard lattice + panel.xyplot(x=x, y=y, ...) + #CURRENTLY JUST col via mod.col + #user control of model + panel.ablineq(lm(y ~ x + 0), cex = 0.8, + x = min(c(x, y), na.rm=TRUE), + y = max(c(x, y), na.rm=TRUE), + r.squared = TRUE, adj = c(0,1), + sep = " (", sep.end = ")", + offset=0, varStyle = NULL, + col.line = .mc, col.text = .mc, digits = 2) + }, + xlab="Measurement", ylab="model", + scales=list(y=list(relation="free", + rot=90), + x=list(relation="free"))) + p1.ls <- modifyList(p1.ls, .x.args) + p <- do.call(xyplot, p1.ls) + plot(p) + + # plot(d2$.value, d2$pred, type = "n", main = i, col = .bc, + # xlab = .xlb, ylab = .ylb, xlim = lims, ylim = lims) + # grid() + # abline(a = 0, b = 1, col = "grey") + # points(d2$.value, d2$pred) + # abline(mod, col = .mc, lty = 2) + # text(lims[1], lims[2], .sum, adj = c(0, 1), cex = 0.75) + } + if (2 %in% plot.type) { + #xlab + if(!"xlab" %in% names(.x.args)){ + .x.args$xlab <- "Sample [index]" + } + if(!"ylab" %in% names(.x.args)){ + .x.args$ylab <- "Measurement, Model" + } else { + if(length(.x.args$ylab)>1){ + if(!"key.text" %in% names(.x.args)){ + .x.args$key.text <- .x.args$ylab[1:2] + } + .x.args$ylab <- paste(.x.args$ylab[1], .x.args$ylab[2], sep =", ") + } + } + if(!"key.text" %in% names(.x.args)){ + .x.args$key.text <- c("Measurement", "Model") + } + if(!"col" %in% names(.x.args)){ + .x.args$col <- trellis.par.get("superpose.line")$col[1:2] + } + if("mod.col" %in% names(.x.args)){ + .x.args$col <- c(.x.args$col[1], .x.args$mod.col) + } + + + #ylab + #can to two terms for + + #if("ylab" %in% names(.x.args)){ + # if(length(.x.args$ylab)>1){ + # if(!"key.text" %in% names(.x.args)){ + # .x.args$key.text <- .x.args$ylab[1:2] + # } + # .x.args$ylab <- paste(.x.args$ylab[1], .x.args$ylab[2], sep =", ") + # } else { + # if(!"key.text" %in% names(.x.args)){ + # .x.args$key.text <- c("Measurement", "Model") + # } + # } + #} else { + # if(!"key.text" %in% names(.x.args)){ + # .x.args$key.text <- c("Measurement", "Model") + # } + # .x.args$ylab <- "Measurement, Model" + #} + + + + + ######################### + #previous code + ######################### + #plot(d2$.value, type = "n", main = i, col = .bc, + # ylab = .ylb, xlab = .xlb, ylim = lims) + #lines(d2$.value) + #lines(d2$pred, col = .mc) + ######################## + #using standardised index + #make 'ordered profile codes' at top + # before any subsetting... + # .ord.pro.c <- rsp_profile_code_order(dat) + dat$.index <- as.numeric(factor(dat$PROFILE_CODE, levels=.ord.pro.c, + ordered = TRUE)) + dat<- dat[order(dat$.index),] + p2.ls <- list(x= .value + pred ~ .index | SPECIES_NAME, data=dat, + auto.key = list(text=.x.args$key.text, + space="top", columns=2), + type="l", + panel= function(...){ + .rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), + panel.grid, ...) + lattice::panel.xyplot(...) + }, + scale=list(relation="free"), + par.settings = simpleTheme(col=.x.args$col)) + p2.ls <- modifyList(p2.ls, .x.args) + p <- do.call(xyplot, p2.ls) + plot(p) + ###################### + + # or any with any missing are plot on different x scale + # maybe find longest, take range for that + #xyplot(.value + .pred ~ ) + } + + invisible(dat) +} + + + + + + + + + + + diff --git a/R/rsp.q.R b/R/rsp.q.R new file mode 100644 index 0000000..9bd118c --- /dev/null +++ b/R/rsp.q.R @@ -0,0 +1,95 @@ +#' @name rsp.q +#' @title Quick access to common (re)SPECIATE subsets. +#' @aliases rsp_q rsp_q_gas rsp_q_other rsp_q_pm rsp_q_pm.ae6 rsp_q_pm.ae8 +#' rsp_q_pm.cr1 rsp_q_pm.simplified + +#' @description \code{rsp_q_} functions are quick access wrappers to commonly +#' requested (re)SPECIATE subsets. +#' @return \code{rsp_q_} functions typically return a \code{respeciate} +#' \code{data.frame} of the requested profiles. +#' +#' For example: +#' +#' \code{rsp_q_gas()} returns all gaseous profiles in (re)SPECIATE +#' (\code{PROFILE_TYPE == 'GAS'}). +#' +#' \code{rsp_q_pm} returns all particulate matter (PM) profiles in (re)SPECIATE +#' not classified as a special PM type (\code{PROFILE_TYPE == 'PM'}). +#' +#' The special PM types are subsets profiles intended for special +#' applications, and these include \code{rsp_q_pm.ae6} (type \code{PM-AE6}), +#' \code{rsp_q_pm.ae8} (type \code{PM-AE8}), \code{rsp_q_pm.cr1} (type +#' \code{PM-CR1}), and \code{rsp_q_pm.simplified} (type \code{PM-Simplified}). +#' +#' \code{rsp_q_other} returns all profiles classified as other in (re)SPECIATE +#' (\code{PROFILE_TYPE == 'OTHER'}). +#' + + +############################# +#NOTES +############################ + +# might not be keeping these + +# should be a quicker way of doing this... +# maybe try going sysdata directly instead of using rsp_profile_info??? +# BUT might not be much a speed saving... + +#profile types +#GAS, OTHER, PM, PM-AE6 PM-AE8 PM-CR1 PM-Simplified + +# any others worth doing??? + +#' @rdname rsp.q +#' @export + +rsp_q_gas <- function(){ + rsp_profile(rsp_profile_info("gas", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_other <- function(){ + rsp_profile(rsp_profile_info("other", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_pm <- function(){ + rsp_profile(rsp_profile_info("pm", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_pm.ae6 <- function(){ + rsp_profile(rsp_profile_info("pm-ae6", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_pm.ae8 <- function(){ + rsp_profile(rsp_profile_info("pm-ae8", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_pm.cr1 <- function(){ + rsp_profile(rsp_profile_info("pm-cr1", by = "profile_type", partial=FALSE)) +} + +#' @rdname rsp.q +#' @export + +rsp_q_pm.simplified <- function(){ + rsp_profile(rsp_profile_info("pm-simplified", by = "profile_type", partial=FALSE)) +} + + + + diff --git a/R/sp.rescale.R b/R/rsp.rescale.R similarity index 92% rename from R/sp.rescale.R rename to R/rsp.rescale.R index 743547e..7ef811e 100644 --- a/R/sp.rescale.R +++ b/R/rsp.rescale.R @@ -1,14 +1,14 @@ -#' @name sp.rescale +#' @name rsp.rescale #' @title (re)SPECIATE profile rescaling functions #' @aliases sp_rescale sp_rescale_profile sp_rescale_species #' @description Functions for rescaling -#' @description \code{sp_rescale} rescales the percentage weight records in +#' @description \code{rsp_rescale} rescales the percentage weight records in #' a supplied (re)SPECIATE profile data set. This can be by profile or species -#' subsets, and \code{sp_rescale_profile} and \code{sp_rescale_species} provide +#' subsets, and \code{rsp_rescale_profile} and \code{rsp_rescale_species} provide #' short-cuts to these options. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) #' profiles. #' @param method numeric, the rescaling method to apply: #' 1 \code{x/total(x)}; @@ -38,7 +38,7 @@ #NOTE -#' @rdname sp.rescale +#' @rdname rsp.rescale #' @export ## #' @import data.table # now done in xxx.r @@ -63,13 +63,13 @@ -sp_rescale <- function(x, method = 2, by = "species"){ +rsp_rescale <- function(rsp, method = 2, by = "species"){ ################################# #check x is a respeciate object?? #check it has .value - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) #save to return as is.. # thinking about this @@ -197,18 +197,18 @@ sp_rescale <- function(x, method = 2, by = "species"){ } -#' @rdname sp.rescale +#' @rdname rsp.rescale #' @export -sp_rescale_profile <- function(x, method = 1, by ="profile"){ - sp_rescale(x=x, method=method, by=by) +rsp_rescale_profile <- function(rsp, method = 1, by ="profile"){ + rsp_rescale(rsp=rsp, method=method, by=by) } -#' @rdname sp.rescale +#' @rdname rsp.rescale #' @export -sp_rescale_species <- function(x, method = 2, by ="species"){ - sp_rescale(x=x, method=method, by=by) +rsp_rescale_species <- function(rsp, method = 2, by ="species"){ + rsp_rescale(rsp=rsp, method=method, by=by) } @@ -233,13 +233,13 @@ sp_rescale_species <- function(x, method = 2, by ="species"){ # may need to think about additional local scaling # e.g. within in profile [species conc]/[sum of all species concs] -rsp_rescale_species <- function(x, method = 2){ +.rsp_rescale_species <- function(x, method = 2){ ################################# #check x is a respeciate object?? #check it has .value - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(x) #save to return as is.. # thinking about this diff --git a/R/sp.reshape.R b/R/rsp.reshape.R similarity index 72% rename from R/sp.reshape.R rename to R/rsp.reshape.R index 490f4d3..ca0aac5 100644 --- a/R/sp.reshape.R +++ b/R/rsp.reshape.R @@ -1,28 +1,28 @@ -#' @name sp.reshape +#' @name rsp.reshape #' @title (re)SPECIATE profile reshaping functions -#' @aliases sp_dcast sp_dcast_profile sp_dcast_species sp_melt_wide +#' @aliases rsp_dcast rsp_dcast_profile rsp_dcast_species rsp_melt_wide #' @description Functions for reshaping (re)SPECIATE profiles -#' @description \code{sp_dcast} and \code{sp_melt_wide} reshape supplied -#' (re)SPECIATE profile(s). \code{sp_dcast} converts these from their supplied +#' @description \code{rsp_dcast} and \code{rsp_melt_wide} reshape supplied +#' (re)SPECIATE profile(s). \code{rsp_dcast} converts these from their supplied #' long form to a widened form, \code{dcast}ing the data set by either species #' or profiles depending on the \code{widen} setting applied. -#' \code{sp_dcast_profile} and \code{sp_dcast_species} are wrappers for these -#' options. \code{sp_melt_wide} attempts to return a previously widened data +#' \code{rsp_dcast_profile} and \code{rsp_dcast_species} are wrappers for these +#' options. \code{rsp_melt_wide} attempts to return a previously widened data #' set to the original long form. -#' @param x A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) -#' profiles in standard long form or widened form for -#' \code{\link{sp_dcast}} and \code{\link{sp_melt_wide}}, respectively. -#' @param widen character, when widening \code{x} with -#' \code{\link{sp_dcast}}, the data type to \code{dcast}, +#' @param rsp A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' profiles in standard long form or widened form using +#' \code{\link{rsp_dcast}} and \code{\link{rsp_melt_wide}}, respectively. +#' @param widen character, when widening \code{rsp} with +#' \code{\link{rsp_dcast}}, the data type to \code{dcast}, #' currently \code{'species'} (default) or \code{'profile'}. See Note. #' @param pad logical or character, when \code{melt}ing a previously widened #' data set, should output be re-populated with species and/or profile #' meta-data, discarded when widening. This is currently handled by -#' \code{\link{sp_pad}}. The default \code{TRUE} applies standard settings, +#' \code{\link{rsp_pad}}. The default \code{TRUE} applies standard settings, #' so does not include profile sources reference meta-data. (See -#' \code{\link{sp_pad}} for other options). +#' \code{\link{rsp_pad}} for other options). #' @param drop.nas logical, when \code{melt}ing a previously widened #' data set, should output be stripped of any rows containing empty #' weight/value columns. Because not all profile contains all species, the @@ -30,14 +30,14 @@ #' attempt account for that when working with standard re(SPECIATE) #' profiles. It is, however, sometimes useful to check first, e.g. when #' building profiles yourself. -#' @return \code{sp_dcast} returns the wide form of the supplied -#' \code{respeciate} profile. \code{sp_melt_wide} +#' @return \code{rsp_dcast} returns the wide form of the supplied +#' \code{respeciate} profile. \code{rsp_melt_wide} #' returns the (standard) long form of a previously widened profile. #' @note Conventional long-to-wide reshaping of data, or \code{dcast}ing, can #' be slow and memory inefficient. So, \code{respeciate} uses the #' \code{\link[data.table:dcast]{data.table::dcast}} -#' method. The \code{sp_dcast_species} method, +#' method. The \code{rsp_dcast_species} method, #' applied using \code{widen='species'}, is effectively: #' #' \code{dcast(..., PROFILE_CODE+PROFILE_NAME~SPECIES_NAME, value.var="WEIGHT_PERCENT")} @@ -56,8 +56,19 @@ #NOTE -#' @rdname sp.reshape -#' @export +############################# +# these use +############################# +# .rsp_tidy_profile +# data.table::as.data.table +# data.table::dcast +# data.table::melt +# rsp_pad + + + + + ## now imports from xxx.r ## #' @import data.table @@ -74,7 +85,10 @@ #long_to_wide reshape ###################### -sp_dcast <- function(x, widen = "species"){ +#' @rdname rsp.reshape +#' @export + +rsp_dcast <- function(rsp, widen = "species"){ #################### #see ?data.table::dcast for examples @@ -92,7 +106,7 @@ sp_dcast <- function(x, widen = "species"){ #adds .value if missing ## using .value rather the WEIGHT_PERCENT in case rescaled - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) #save class cls <- class(x) @@ -134,31 +148,36 @@ sp_dcast <- function(x, widen = "species"){ # could use an output arg??? as.is, data.frame, etc... out <- as.data.frame(out) - #class(out) <- cls + if(widen=="species"){ + class(out) <- c("rsp_sw", cls) + } else { + class(out) <- c("rsp_pw", cls) + } out } -#' @rdname sp.reshape +###################################### +# rsp_dcast(..., widen) shortcuts +###################################### + +#' @rdname rsp.reshape #' @export -sp_dcast_profile <- function(x, widen = "profile"){ - sp_dcast(x=x, widen=widen) +rsp_dcast_profile <- function(rsp, widen = "profile"){ + rsp_dcast(rsp=rsp, widen=widen) } - -#' @rdname sp.reshape +#' @rdname rsp.reshape #' @export -sp_dcast_species <- function(x, widen = "species"){ - sp_dcast(x=x, widen=widen) +rsp_dcast_species <- function(rsp=rsp, widen = "species"){ + rsp_dcast(rsp=rsp, widen=widen) } -#' @rdname sp.reshape -#' @export ## now imports from xxx.r ## #' @import data.table @@ -179,7 +198,10 @@ sp_dcast_species <- function(x, widen = "species"){ #wide_to_long reshape ###################### -sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ +#' @rdname rsp.reshape +#' @export + +rsp_melt_wide <- function(rsp, pad = TRUE, drop.nas = TRUE){ #################### #see ?data.table::melt for examples @@ -187,7 +209,7 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ #adds .value if missing ## using .value rather the WEIGHT_PERCENT in case rescaled - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(rsp) #save class cls <- class(x) @@ -202,12 +224,12 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ "WEIGHT_PERCENT", ".value") .test <- .test[.test %in% names(xx)] if(length(.test)>2){ - stop("sp_melt_wide halted; x already looks like a long profile.", call.=FALSE) + stop("RSP> melt halted; rsp already looks like a long profile.", call.=FALSE) } .test.sp <- length(grep("PROFILE", .test)) .test.pr <- length(grep("SPECIES", .test)) if(.test.pr>0 & .test.sp>0){ - stop("sp_melt_wide halted; x already looks suspect.", call.=FALSE) + stop("RSP> melt halted; rsp looks looks suspect.", call.=FALSE) } .long <- "bad" if(.test.pr>0 & length(.test)==.test.pr){ @@ -219,7 +241,7 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ .long <- "SPECIES_NAME" } if(.long=="bad"){ - stop("sp_melt_wide halted; x already looks suspect.", call.=FALSE) + stop("RSP> melt halted; rsp looks suspect.", call.=FALSE) } #should only be species.wide or profile.wide @@ -228,50 +250,51 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ out <- data.table::melt(xx, id.vars = .id.vars) names(out)[names(out)=="variable"] <- .long names(out)[names(out)=="value"] <- ".value" + if("SPECIES_ID" %in% names(out)){ + out$SPECIES_ID <- as.character(out$SPECIES_ID) + } + if("SPECIES_NAME" %in% names(out)){ + out$SPECIES_NAME <- as.character(out$SPECIES_NAME) + } + if("PROFILE_CODE" %in% names(out)){ + out$PROFILE_CODE <- as.character(out$PROFILE_CODE) + } #out$WEIGHT_PERCENT <- out$.value #merge if padding ##################### #might not be best way of doing it - #testing sp_pad as an alternative to previous remarked code??? - # first need to standardise method, decide where to drop.nas, - # finalise formals, decide best data.table methods, etc + # could pass other args to pad + # might need to think about the .value/WEIGHT_PERCENT handling if(is.logical(pad) && pad){ pad <- "standard" } if(is.character(pad)){ + out <- rsp_pad(out, pad, drop.nas) + #tidy bad profile_name + if(all(is.na(out$PROFILE_NAME)) && "PROFILE_CODE" %in% names(out)){ + out$PROFILE_NAME <- out$PROFILE_CODE + } + #tidy bad species_id + if(all(is.na(out$SPECIES_ID)) && "SPECIES_NAME" %in% names(out)){ + out$SPECIES_ID <- as.character(-as.numeric(factor(out$SPECIES_NAME))) + } - out <- sp_pad(out, pad) - -# PROFILES <- as.data.table(sysdata$PROFILES) -# SPECIES_PROPERTIES <- as.data.table(sysdata$SPECIES_PROPERTIES) -# if(.long=="PROFILE_CODE"){ -# out <- merge(out, PROFILES, by = .long, all.y=FALSE, -# all.x=TRUE, allow.cartesian=TRUE) -# .tmp <- intersect(names(out), names(SPECIES_PROPERTIES)) -# out <- merge(out, SPECIES_PROPERTIES, by = .tmp, all.y=FALSE, -# all.x=TRUE, allow.cartesian=TRUE) -# } else { -# #.long must be "SPECIES_NAME" -# out <- merge(out, SPECIES_PROPERTIES, by = .long, all.y=FALSE, -# all.x=TRUE, allow.cartesian=TRUE) -# .tmp <- intersect(names(out), names(PROFILES)) -# out <- merge(out, PROFILES, by = .tmp, all.y=FALSE, -# all.x=TRUE, allow.cartesian=TRUE) -# } -# #to get weight_percentage etc -# SPECIES <- as.data.table(sysdata$SPECIES) -# .tmp <- intersect(names(out), names(SPECIES)) -# print(.tmp) -# out <- merge(out, SPECIES, by = .tmp, all.y=FALSE, -# all.x=TRUE, allow.cartesian=TRUE) -# } else { -# #not great but... -# #if not padding WEIGHT_PERCENT has to be .value -# out$WEIGHT_PERCENT <- out$.value } + + ################################ + # could tidy structure here?? + ################################ + + # if weight_percent but not .value add .value + # if.value but not weight_percent add .value + # similar for profile_name/code and species_name/id + + # is that done in rsp_build_x ?? + + #drop.nas... if(drop.nas){ if(".value" %in% names(out)){ @@ -284,11 +307,13 @@ sp_melt_wide <- function(x, pad = TRUE, drop.nas = TRUE){ #if so, in else here?? } } - out <- as.data.frame(out) + #output #need to rationalise outputs!!! - rsp_build_respeciate(out) - + #.rsp_build_respeciate(out) + out <- as.data.frame(out) + class(out) <- cls[!cls %in% c("rsp_pw", "rsp_sw")] + out } diff --git a/R/spx.R b/R/rsp.x.R similarity index 69% rename from R/spx.R rename to R/rsp.x.R index 7b9a76c..f6238f8 100644 --- a/R/spx.R +++ b/R/rsp.x.R @@ -1,39 +1,40 @@ -#' @name spx -#' @title spx_ functions for grouping and subsetting -#' @aliases spx_ spx_copy spx_n_alkane spx_btex +#' @name rsp.x +#' @title rsp_x_ functions for grouping and subsetting (re)SPECIATE profiles +#' @aliases rsp_x rsp_x_copy rsp_x_nalkane rsp_x_btex +# still wondering if these should be rsp_cut_... -#' @description \code{spx_} functions generate a vector of assignment -#' terms and can be used to subset or condition a supplied re(SPECIATE) +#' @description \code{rsp_x_} functions generate a vector of assignment +#' terms and can be used to subset or condition a supplied (re)SPECIATE #' \code{data.frame}. #' -#' Most commonly, the \code{spx_} functions accept a single input, a -#' re(SPECIATE) \code{data.frame} and return a logical vector of +#' Most commonly, the \code{rsp_x_} functions accept a single input, a +#' (re)SPECIATE \code{data.frame} and return a logical vector of #' length \code{nrow(x)}, identifying species of interest as #' \code{TRUE}. So, for example, they can be used when #' \code{\link{subset}}ting in the form: #' -#' \code{subset(x, spx_n_alkane(x))} +#' \code{subset(rsp, rsp_x_nalkane(rsp))} #' -#' ... to extract just n-alkane records from a \code{respeciate} object -#' \code{x}. +#' ... to extract just n-alkane records from a supplied \code{respeciate} +#' object \code{rsp}. #' -#' However, some accept additional arguments. For example, \code{spx_copy} +#' However, some accept additional arguments. For example, \code{rsp_x_copy} #' also accepts a reference data set, \code{ref}, and a column identifier, -#' \code{by}, and tests \code{x$by \%in\% unique(ref$by)}. +#' \code{by}, and tests \code{rsp$by \%in\% unique(ref$by)}. #' -#' @param x a \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +#' @param rsp a \code{respeciate} object, a \code{data.frame} of (re)SPECIATE #' profiles. -#' @param ref (\code{spx_copy} only) a second \code{respeciate} object, to -#' be used as reference when testing \code{x}. -#' @param by (\code{spx_copy} only) character, the name of the column -#' in \code{ref} to copy when testing \code{x}. -#' @return \code{spx_copy} outputs can be modified but, by default, it +#' @param ref (\code{rsp_x_copy} only) a second \code{respeciate} object, to +#' be used as reference when subsetting (or conditioning) \code{rsp}. +#' @param by (\code{rsp_x_copy} only) character, the name of the column +#' in \code{ref} to copy when subsetting (or conditioning) \code{rsp}. +#' @return \code{rsp_x_copy} outputs can be modified but, by default, it #' identifies all species in the supplied reference data set. #' -#' \code{spx_n_alkane} identifies C1 to C40 n-alkanes. +#' \code{rsp_x_nalkane} identifies (straight chain) C1 to C40 n-alkanes. #' -#' \code{spx_btex} identifies the BTEX group of aromatic hydrocarbons +#' \code{rsp_x_btex} identifies the BTEX group of aromatic hydrocarbons #' (benzene, toluene, ethyl benzene, and M-, O- and P-xylene). ############################# @@ -44,11 +45,10 @@ # but it did not seem to be slowing things down # and other approaches seems likely to get messy # really quick... +# tidyverse folks would argue against it... # others to do???? -# the BTEXs - doing/testing - # others to consider??? # PAHs different groups @@ -56,55 +56,70 @@ # elementals??? # monitoring network relevant subsets of species -# special cases??? +#do we need a strategy to rationalize multiple species names +# see rsp_x_nalkane where some species have two names in SPECIATE. -# spx_ref(x, ref, by="") -# where x is respeciate object, ref is a reference -# by is column in ref; case is x$by %in% unique(ref$by) +################################# +# rsp_x_copy +################################# -# could ref also be a vector of terms??? +# identify species in rsp that are in ref(erence) +# special cases??? +# rsp_x_ref(rsp, ref, by="") +# where rsp is respeciate object, ref is a reference +# by is column in ref; case is x$by %in% unique(ref$by) +# could ref also be a vector of terms??? -#' @rdname spx +#' @rdname rsp.x #' @export -spx_copy <- function(x, ref = NULL, by="species_id"){ +rsp_x_copy <- function(rsp, ref = NULL, by="species_id"){ #maybe warn??? if(is.null(ref)){ - ref <- x + ref <- rsp } - names(x) <- tolower(names(x)) + + names(rsp) <- tolower(names(rsp)) names(ref) <- tolower(names(ref)) .tmp <- unique(ref[, by]) - x[, by] %in% .tmp + rsp[, by] %in% .tmp } + ##################### -#spx_n_alkanes +#rsp_x_nalkanes ####################### +# identify only the n-alkanes in rsp... #source names # from https://en.wikipedia.org/wiki/List_of_straight-chain_alkanes # (might be duplicates) -# (some not using standard names) + +# some in SPECIATE may not standard names... +# need to check because I am not sure if standard names are international.. + +# some are just [alkane] rather than n-[alkane] +# but not sure if any are in as both [alkane] and n-[alkane] # could try smiles, molecular formula, cas numbers??? # should be one entry/species if they are unique??? #test ## a <- sysdata$SPECIES_PROPERTIES -## b <- subset(a, spx_n_alkane(a)) +## b <- subset(a, rsp_x_nalkane(a)) ## b[order(b$SPEC_MW),] -#' @rdname spx +#' @rdname rsp.x #' @export -spx_n_alkane <- function(x){ +rsp_x_nalkane <- function(rsp){ + #group x by is/isn't n-alkane - tolower(x$SPECIES_NAME) %in% c("methane", #C1 + tolower(rsp$SPECIES_NAME) %in% c("methane", #C1 "ethane", "propane", "n-butane", @@ -152,7 +167,7 @@ spx_n_alkane <- function(x){ ##################### -#spx_n_btex +#rsp_x_btex ####################### # Benzene, toluene, ethylbenzene, 3 xylenes isomers @@ -180,22 +195,23 @@ spx_n_alkane <- function(x){ # if several names for btex, might need to think about how to # sample (CAS? etc), merge and compare??? - - #tests ######################### ## a <- sysdata$SPECIES_PROPERTIES -## b <- subset(a, spx_btex(a)) +## b <- subset(a, rsp_x_btex(a)) ## b[order(b$SPEC_MW),] ## to do -#' @rdname spx +#' @rdname rsp.x #' @export -spx_btex <- function(x){ - #group x by is/isn't btex - tolower(x$SPECIES_NAME) %in% c( +rsp_x_btex <- function(rsp){ + + #identify species that is a btex + #might need to think about mixtures... + # for example all xylenes or all c2 benzenes, etc... + tolower(rsp$SPECIES_NAME) %in% c( #to test/check... "benzene", "toluene", diff --git a/R/sp.R b/R/sp.R deleted file mode 100644 index 51c0027..0000000 --- a/R/sp.R +++ /dev/null @@ -1,452 +0,0 @@ -#' @name sp -#' @title sp_ functions -#' @aliases sp_profile sp_build_rsp_x - - -#' @description sp function to get profiles from the R (re)SPECIATE archive - -#' @description \code{\link{sp_profile}} extracts a -#' SPECIATE profile from the local (re)SPECIATE archive. -#' @param code character, numeric or data.frame, the SPECIATE code -#' of the required profile (EPA SPECIATE identifier PROFILE_CODE). This is -#' typically one or concatenated character or numeric entries, but can also -#' be a \code{respeciate} object or similar \code{data.frame} containing -#' the \code{code}s as a named \code{PROFILE_NAME} column. -#' @param ... additional arguments, ignored except by \code{sp_profile} which -#' treats these as additional sources for \code{code}. -#' @param include.refs logical, (for \code{sp_profile} only) include profile -#' reference information when getting the requested profile(s) from the -#' archive, default \code{FALSE}. -#' @param x (for \code{sp_build}s only) A \code{data.frame} or similar (i.e. -#' something that can be converted to a \code{data.frame} using -#' \code{as.data.frame}) to be converted into a \code{respeciate} object for -#' comparison with SPECIATE profiles. -#' @param profile_name,profile_code (for \code{sp_build}s only; -#' \code{character}) The name of the column in \code{x} containing -#' profile name and code, respectively. If not already named according -#' to SPECIATE conventions, at least one of these will need to be assigned. -#' @param species_name,species_id (for \code{sp_build}s only; -#' \code{character}) The name of the column in \code{x} containing -#' species name and id, respectively. If not already named according -#' to SPECIATE conventions, at least one of these will need to be assigned. -#' @param value (for \code{sp_build}s only; \code{character}) The name -#' of the column in \code{x} containing measurement values. If not already -#' named according to SPECIATE conventions, this will need to be assigned. -#' @return \code{sp_profile} returns a object of -#' \code{respeciate} class, a \code{data.frame} containing a -#' (re)SPECIATE profile. -#' -#' \code{sp_build}s attempt to build and return a (re)SPECIATE-like profile -#' that can be compared with with data in re(SPECIATE). -#' @note With \code{sp_profile}: -#' -#' The option \code{include.refs} adds profile source reference -#' information to the returned \code{respeciate} data set. The default option -#' is to not include these because some profiles have several associated -#' references and including these replicates records, once per reference. -#' \code{respeciate} code is written to handle this but if you are developing -#' own methods or code and include references in any profile build you may be -#' biasing some analyses in favor of those multiple-reference profile unless -#' you check and account such cases. -#' -#' With \code{sp_build}s: -#' -#' It is particularly IMPORTANT that you use EPA SPECIATE conventions when -#' assign species information if you want to compare your data with SPECIATE -#' profiles. Currently, working on option to improve on this (and very happy -#' to discuss if anyone has ideas), but current best suggestion is: (1) -#' identify the SPECIATE species code for all the species in your data set, -#' and (2) assign these as \code{species_id} when \code{sp_build}ing. The -#' function will then associate the \code{species_name}. -#' -#' @references -#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -#' The development and uses of EPA SPECIATE database. -#' Atmospheric Pollution Research, 1(4), pp.196-206. -#' @examples \dontrun{ -#' x <- sp_profile(c(8833, 8850)) -#' plot(x)} - -#NOTES -####################### - -#to think about -####################### - -#add functions??? - -## sp_build_profile to make a profile locally -## needs profile_name, profile_code -## species_name, species_id -## weight_percent (and possibly .value) - -## sp_import_profile to import a profile from an external source -## extension of above to import data from specific sources -## might be very code intensive..? - -## local function to pad data using database??? - -#' @rdname sp -#' @export -## (now importing via xxx.r) -## #' @import data.table - -# may need to set data.table specifically?? -# data.table::as.data.table, etc?? - -##################### -#to think about -##################### -# not sure but I think something in the main build: -# (default; include.refs = FALSE) -# PROFILES>>SPECIES>>SPECIES_PROPERTIES -# (full build; include.refs = TRUE) -# PROFILES>>SPECIES>>SPECIES_PROPERTIES>>PROFILE_REFERENCE>>REFERENCES -# is replicating profiles. -# - -#v 0.2 -# based on previous sp_profile but using data.table -# (0.1 version currently unexported sp_profile.old) - -sp_profile <- function(code, ..., include.refs=FALSE) { - - # code currently handles: - # respeciate.ref, data.frames containing profile_code, - # numerics and characters - - ####################### - #could replace code AND ... with just ...??? - # but would need to think about options - # if any in ... were data.frames - ###################### - .try <- lapply(list(code, ...), function(.code){ - if(is.data.frame(.code) && "PROFILE_CODE" %in% names(.code)){ - .code <- unique(.code$PROFILE_CODE) - } - if(is.numeric(.code)) { - .code <- as.character(.code) - } - if(!is.character(.code)) { - warning("unexpected 'code' object found and ignored", - call.=FALSE) - .code <- NULL - } - .code - }) - code <- do.call(c, .try) - - ################ - #previous.... - ################ - #if(is.data.frame(code) && "PROFILE_CODE" %in% names(code)){ - # code <- unique(code$PROFILE_CODE) - #} - #if(is.numeric(code)) code <- as.character(code) - #if(!is.character(code)) { - # stop("unexpected 'code' class", - # call.=FALSE) - #} - - PROFILES <- data.table::as.data.table(sysdata$PROFILES) - SPECIES <- data.table::as.data.table(sysdata$SPECIES) - SPECIES_PROPERTIES <- data.table::as.data.table(sysdata$SPECIES_PROPERTIES) - PROFILE_REFERENCE <- data.table::as.data.table(sysdata$PROFILE_REFERENCE) - REFERENCES <- data.table::as.data.table(sysdata$REFERENCES) - - ########################## - #testing tolower below - # as a fix for code arg case sensitivity - ########################## - # could test replacing some of this with sp_pad??? - # IF sp_pad stays - df <- PROFILES[tolower(PROFILES$PROFILE_CODE) %in% tolower(code),] - df <- merge(df, SPECIES, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE, - allow.cartesian=TRUE) - df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID", all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - if(include.refs){ - df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE", all.y=FALSE, - all.x=TRUE, allow.cartesian=TRUE) - df <- merge(df, REFERENCES, by = "REF_Code", all.y=FALSE, all.x=TRUE, - allow.cartesian=TRUE) - } - df <- df[order(df$PROFILE_CODE, decreasing = FALSE),] - - #build - #note: currently adding .value in rsp_build_respeciate - # could do it here? - # leaving there for now... because we would - # still have to do it there for self-build or - # imported profiles... - df <- rsp_build_respeciate(as.data.frame(df)) - return(df) -} - - - -############################## -# sp_build_rsp_x -############################## - -# notes -############################## - -# sp_build_rsp_x currently converts x as.data.frame(x) -# if tibble is loaded, any tibbles complicate things - -# BUT might want to revisit this because it looked like: -# the data structure was fine but -# print.respeciate was having problems... - -# BUT might be other problems I did not spot - -# BUT be nice if c("respeciate", class("tibble")) could be use... -# to retain the data type history -# and drop back to tibble rather than data.frame.... - - -#' @rdname sp -#' @export - -sp_build_rsp_x <- - function(x, profile_code, profile_name, - species_name, species_id, - value, ...){ - - - # light build for a rsp_x data object - # might need spec_mwt - - ########################### - # current build rules - ########################### - - # must be a data.frame or something that can be converted - # using as.data.frame(x) - - # profile and species columns must be character... - - # profile_name: if not there, if sent in call use, - # else if there use profile_code - # profile_code: if not there, if sent in call use, - # else if there use profile_name - # species_name: if not there, if sent in call use, - # else if there use use species_id to look-up - # if any missing, warn - # species_id: if not there, if sent in call use, - # else if there use species_name to look-up - # if any missing, warn - # .value: if not there, if sent in call use. - # (NEW/TESTING) else if there use WEIGHT_PERCENT - # WEIGHT_PERCENT:if not there, if sent in call use - # else if there use .value to look-up - - # don't build/error if any of these missing and end of build - - # redundant? - # currently not using ... - .x.args <- list(...) - - #adding the as.data.frame because - # code is not planning nicely with Dennis' tibbles - # if tibble is loaded before respeciate... - x <- as.data.frame(x) - - #rationalise this?... - # could they be else options when - # check for species and profile columns? - ################################ - # notes - # profile and species columns all need to character - # user could supply any thing and previously - # only applying as.character when making something new... - # else may at start then when making something new... - # (at end did not work for species if building one of species_name - # and species_id from other...) - # also - # do values need to be as.numeric??? - if("PROFILE_NAME" %in% names(x)){ - x$PROFILE_NAME <- as.character(x$PROFILE_NAME) - } - if("PROFILE_CODE" %in% names(x)){ - x$PROFILE_CODE <- as.character(x$PROFILE_CODE) - } - if("SPECIES_NAME" %in% names(x)){ - x$SPECIES_NAME <- as.character(x$SPECIES_NAME) - } - if("SPECIES_ID" %in% names(x)){ - x$SPECIES_ID <- as.character(x$SPECIES_ID) - } - - #if not there and sent in call - - #note: - #current making all BUT values, character class - if(!"PROFILE_NAME" %in% names(x) & (!missing(profile_name))){ - if(!profile_name %in% names(x)){ - stop("sp_build> '", as.character(profile_name)[1], - "' not in 'x'...", sep="", call. = FALSE) - } - x$PROFILE_NAME <- as.character(x[, profile_name]) - } - if(!"PROFILE_CODE" %in% names(x) & (!missing(profile_code))){ - if(!profile_code %in% names(x)){ - stop("sp_build> '", as.character(profile_code)[1], - "' not in 'x'...", sep="", call. = FALSE) - } - x$PROFILE_CODE <- as.character(x[, profile_code]) - } - if(!"SPECIES_NAME" %in% names(x) & (!missing(species_name))){ - if(!species_name %in% names(x)){ - stop("sp_build> '", as.character(species_name)[1], - "' not in 'x'...", sep="", call. = FALSE) - } - x$SPECIES_NAME <- as.character(x[, species_name]) - } - if(!"SPECIES_ID" %in% names(x) & (!missing(species_id))){ - if(!species_id %in% names(x)){ - stop("sp_build> '", as.character(species_id)[1], - "' not in 'x'...", sep="", call. = FALSE) - } - x$SPECIES_ID <- as.character(x[, species_id]) - } - if(!".value" %in% names(x)){ - if(missing(value)){ - if("WEIGHT_PERCENT" %in% names(x)){ - x$.value <- x[, "WEIGHT_PERCENT"] - } else { - stop("sp_build> 'value' not found for 'x'...", - sep="", call. = FALSE) - } - } else { - if(!value %in% names(x)){ - stop("sp_build> '", as.character(value)[1], - "' not in 'x'...", sep="", call. = FALSE) - } - } - x$.value <- x[, value] - } - ################# - #old - ################# - #if(!".value" %in% names(x) & (!missing(value))){ - # if(!value %in% names(x)){ - # stop("sp_build> '", as.character(value)[1], - # "' not in 'x'...", sep="", call. = FALSE) - # } - # x$.value <- x[, value] - #} - - #if still not there try to assign using what is there - - if("PROFILE_NAME" %in% names(x) & !"PROFILE_CODE" %in% names(x)){ - x$PROFILE_CODE <- x$PROFILE_NAME - } - if("PROFILE_CODE" %in% names(x) & !"PROFILE_NAME" %in% names(x)){ - x$PROFILE_NAME <- x$PROFILE_CODE - } - test <- c("SPECIES_NAME", "SPECIES_ID")[c("SPECIES_NAME", "SPECIES_ID") - %in% names(x)] - if(length(test)==1){ - #one there, other as look-up - .tmp <- data.table::as.data.table( - sysdata$SPECIES_PROPERTIES[c("SPECIES_NAME", "SPECIES_ID")] - ) - .tmp$SPECIES_NAME <- as.character(.tmp$SPECIES_NAME) - .tmp$SPECIES_ID <- as.character(.tmp$SPECIES_ID) - x <- merge(data.table::as.data.table(x), - data.table::as.data.table(.tmp), - all.x=TRUE, all.y=FALSE, allow.cartesian=TRUE) - x <- as.data.frame(x) - } - if(".value" %in% names(x) & !"WEIGHT_PERCENT" %in% names(x)){ - x$WEIGHT_PERCENT <- x$.value - } - - #test what we have - ######################## - - .test <- c("PROFILE_NAME", "PROFILE_CODE", "SPECIES_NAME", "SPECIES_ID", - ".value", "WEIGHT_PERCENT") - .test <- .test[!.test %in% names(x)] - if(length(.test)>0){ - stop("sp_build> bad data structure, expected column(s) missing/unassigned:\n", - paste(.test, sep="", collapse = ", "), "\n", sep="", call.=FALSE) - } - if(any(is.na(x$SPECIES_ID)) | any(is.na(x$SPECIES_NAMES))){ - warning("sp_build> suspect species data, values missing:\n", - "(respeciate needs valid species entries)\n", - sep="", call.=FALSE) - } - - #output - ###################### - - x <- as.data.frame(x) - class(x) <- c("rsp_x", "respeciate", "data.frame") - x - } - - - - - - - - - - - - - -############################# -#unexported & previous code -############################# - -#sp_profile v 0.1 -#now unexported - -rsp_profile.old <- function(code) { - #handle numerics/characters - ####################### - #could replace code with ...??? - ###################### - if(class(code)[1] == "respeciate" && "PROFILE_CODE" %in% names(code)){ - code <- unique(code$PROFILE_CODE) - } - if(is.numeric(code)) code <- as.character(code) - if(!is.character(code)) stop("unexpected code class") - - PROFILES <- sysdata$PROFILES - SPECIES <- sysdata$SPECIES - SPECIES_PROPERTIES <- sysdata$SPECIES_PROPERTIES - PROFILE_REFERENCE <- sysdata$PROFILE_REFERENCE - REFERENCES <- sysdata$REFERENCES - - #handle multiple codes - ############################ - #replace previous lapply with a direct %in% - ## df <- lapply(code, function(x){ - ## df <- PROFILES[PROFILES$PROFILE_CODE == x, ] - ## ... - ## }) - ## df <- do.call(rbind, df) - #testing as sp_profile.2 - #faster with data.table - ############################ - df <- PROFILES[PROFILES$PROFILE_CODE %in% code,] - df <- merge(df, SPECIES, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE) - df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID", all.y=FALSE, all.x=TRUE) - df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE", all.y=FALSE, all.x=TRUE) - df <- merge(df, REFERENCES, by = "REF_Code", all.y=FALSE, all.x=TRUE) - df <- df[order(df$PROFILE_CODE, decreasing = FALSE),] -## }) - #build - df <- rsp_build_respeciate(df) - return(df) -} - - - diff --git a/R/sp.pls.R b/R/sp.pls.R deleted file mode 100644 index af012a5..0000000 --- a/R/sp.pls.R +++ /dev/null @@ -1,2379 +0,0 @@ -#' @name sp.pls -#' @title (re)SPECIATE profile Positive Least Squares -#' @aliases sp_pls_profile pls_report pls_test pls_fit_species -#' pls_refit_species pls_rebuild pls_plot -#' pls_plot_species pls_plot_profile - -#' @description Functions for Positive Least Squares (PSL) fitting of -#' (re)SPECIATE profiles - -#' @description -#' \code{sp_pls_profile} builds PSL models for supplied profile(s) using -#' the \code{\link{nls}} function, the 'port' algorithm and a lower -#' limit of zero for all model outputs to enforce the positive fits. The -#' modeled profiles are typically from an external source, e.g. a -#' measurement campaign, and are fit as a linear additive series of reference -#' profiles, here typically from (re)SPECIATE, to provide a measure of -#' source apportionment based on the assumption that the profiles in the -#' reference set are representative of the mix that make up the modeled -#' sample. The \code{pls_} functions work with \code{sp_pls_profile} -#' outputs, and are intended to be used when refining and analyzing -#' these PLS models. - -#' @param x A \code{respeciate} object, a \code{data.frame} of -#' profiles in standard long form, intended for PLS modelling. -#' @param ref A \code{respeciate} object, a \code{data.frame} of -#' profiles also in standard long form, used as the set of candidate -#' source profiles when fitting \code{x}. -#' @param power A numeric, an additional factor to be added to -#' weightings when fitting the PLS model. This is applied in the form -#' \code{weight^power}, and increasing this, increases the relative -#' weighting of the more heavily weighted measurements. Values in the -#' range \code{1 - 2.5} are sometimes helpful. -#' @param ... additional arguments, typically ignored or passed on to -#' \code{\link{nls}}. -#' @param pls A \code{sp_pls_profile} output, only used by \code{pls_} -#' functions. -#' @param species for \code{pls_fit_species}, a data.frame of -#' measurements of an additional species to be fitted to an existing -#' PLS model, or for \code{pls_refit_species} a character vector of the -#' names of species already included in the model to be refit. Both are -#' multiple-\code{species} wrappers for \code{pls_rebuild}, a general-purpose -#' PLS fitter than only handles single \code{species}. -#' @param refit.profile (for \code{pls_fit_species}, \code{pls_refit_species} -#' and \code{pls_rebuild}) logical. When fitting a new \code{species} (or -#' refitted an existing \code{species}), all other species in the reference -#' profiles are held 'as is' and added \code{species} is fit to the source -#' contribution time-series of the previous PLS model. By default, the full PLS -#' model is then refit using the revised \code{ref} source profile to generate -#' a PLS model based on the revised source profiles (i.e., ref + new species -#' or ref + refit species). However, this second step can be omitted using -#' \code{refit.profile=FALSE} if you want to use the supplied \code{species} -#' as an indicator rather than a standard member of the apportionment model. -#' @param as.marker for \code{pls_rebuild}, \code{pls_fit_species} and -#' \code{pls_refit_species}, \code{logical}, default \code{FALSE}, when -#' fitting (or refitting) a species, treat it as source marker. -#' @param drop.missing for \code{pls_rebuild}, \code{pls_fit_species} and -#' \code{pls_refit_species}, \code{logical}, default \code{FALSE}, when -#' building or rebuilding a PLS model, discard cases where \code{species} -#' is missing. -#' @param n (for \code{pls_plot}s only) numeric or character -#' identifying the species or profile to plot. If numeric, these are treated -#' as indices of the species or profile, respectively, in the PLS model; if -#' character, species is treated as the name of species and profile is treated -#' as the profile code. Both can be concatenated to produce multiple plots and -#' the special case \code{n = -1} is a short cut to all species or profiles, -#' respectively. -#' @param type (for \code{pls_plot}s only) numeric, the plot type if -#' multiple options are available. -#' @param log (for \code{pls_plot_profile} only) logical, if \code{TRUE} this -#' applies 'log' scaling to the primary Y axes of the plot. - -######################### -# need to check terminology for this... -# The zero handling is a based on offset in plot(..., log="y", off.set) -# but automatically estimated... - -#' @return \code{sp_pls_profile} returns a list of nls models, one per -#' profile/measurement set in \code{x}. The \code{pls_} functions work with -#' these outputs. \code{pls_report} generates a \code{data.frame} of -#' model outputs, and is used of several of the other \code{pls_} -#' functions. \code{pls_fit_species}, \code{pls_refit_species} and -#' \code{pls_fit_parent} return the supplied \code{sp_pls_profile} output, -#' updated on the basis of the \code{pls_} function action. -#' \code{pls_plot}s produce various plots commonly used in source -#' apportionment studies. - -#' @note This implementation of PLS applies the following modeling constraints: -#' -#' 1. It generates a model of \code{x} that is positively constrained linear -#' product of the profiles in \code{ref}, so outputs can only be -#' zero or more. Although the model is generated using \code{\link{nls}}, -#' which is a Nonlinear Least Squares (NLS) model, the fitting term applied -#' in this case is linear. -#' -#' 2. The number of species in \code{x} must be more that the number of -#' profiles in \code{ref} to reduce the likelihood of over-fitting. -#' -#' - -# GENERAL NOTES - -# TO DO -# link to CMB as crude form of CMB and reference? - -# these all need code tidying - -# check individual function notes - - -############################ -############################ -## sp_pls_profile -############################ -############################ - -#' @rdname sp.pls -#' @export - -## now importing locally where possible -## data.table::[function] -## #' @import data.table - -#This is version 2 - -#version 1 combined version2 and pls_report -#now separated because it simplified pls model reworking - -#currently keeping the function args -# might not need to do this BUT -# model does not seem to be tracking them when ... - -# check power handling is right - -sp_pls_profile <- function(x, ref, - power = 1, - ...){ - - ################## - #from rough code - ################## - - ######################## - #only allowing profiles < species - if(length(unique(ref$PROFILE_CODE)) >= length(unique(x$SPECIES_ID))){ - stop("sp_pls: #.need species > #.profiles, more species or less profiles?", - call. = FALSE) - } - - x.args <- list(...) - - #################### - #make sure we only have one species / profile - #################### - #tidying - .pr.cd <- unique(x$PROFILE_CODE) - ## .xx <- respeciate:::rsp_tidy_profile(x) - .xx <- lapply(.pr.cd, function(y){ - .x <- x[x$PROFILE_CODE==y,] - .x <- sp_average_profile(.x, y, .x$PROFILE_NAME[1]) - .x - }) - .xx <- data.table::rbindlist(.xx) -############################# -#currently just dropping them -#can't fit negatives - .xx <- .xx[.xx$.value >= 0, ] - .xx <- .xx[!is.na(.xx$.value),] -############################# - #should be same! redundant - .pr.cd <- unique(.xx$PROFILE_CODE) - - #################### - #reduce ref to just species in x - ################### - #no point to look at any species not in x - ref <- subset(ref, SPECIES_ID %in% unique(.xx$SPECIES_ID)) - - ################### - #nudge - ################### - #dropping nudge from version 2 - ## - #nb: method was nudge before analysis - #and a nudge back after - # nudge(identified.species)->pls->report->nudge back(identified.species) - - #if(!is.null(nudge)){ - # for(i in nudge){ - # #ref might have both WEIGHT_PERCENT and .value - # ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] <- - # ref[ref$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10 - # .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] <- - # .xx[.xx$SPECIES_NAME==i, "WEIGHT_PERCENT"] * 10 - # .xx[.xx$SPECIES_NAME==i, ".value"] <- - # .xx[.xx$SPECIES_NAME==i, ".value"] * 10 - # } - #} - - ############################## - #main step/ once per profile - ############################## - #can we replace this with data.table - ans <- lapply(.pr.cd, function(y){ - .test <- try({ - #need to try this because it does not always work - .x <- as.data.frame(.xx[.xx$PROFILE_CODE==y,]) - .x <- sp_average_profile(.x, "test", "1_test") - - #might not need one of this-and-same-above - #might be better doing it here... - .tmp <- subset(ref, ref$SPECIES_ID %in% unique(.x$SPECIES_ID)) - - #could change this with rbindlist version?? - .ref <- intersect(names(.x), names(.tmp)) - .out <- rbind(.x[.ref], .tmp[.ref]) - .out <- sp_dcast_profile(.out) - - #build formula and model args - .tmp <- names(.out) - .tmp <- .tmp[!.tmp %in% c("SPECIES_ID", "SPECIES_NAME", "test")] - #zero cases for port function - .ls <- paste("m_", .tmp, sep="") - .ls2 <- lapply(.ls, function(x){0}) - names(.ls2) <- .ls - .for <- paste("(m_", .tmp, "*`", .tmp, "`)", sep="", collapse = "+") - .for <- as.formula(paste("test~", .for)) - .wt <- 1/.out$test - ############################ - #note - ############################ - #nls wants lower and upper as vectors - #but seems to handle lists - # should check how this is done? - # might not translate sesnibly... - # pass upper, default INF??? - - .out[is.na(.out)] <- 0 #testing - - args <- list(formula = .for, - data=.out, - start=.ls2, - lower=.ls2, - weights=.wt, - algorithm="port", - control=nls.control(tol=1e-5)) - args <- modifyList(args, x.args[names(x.args) %in% names(args)]) - args$weights <- args$weights^power - x.args <- list(power=power) - - #run nls/pls - ##################### - mod <- do.call(nls, args) -# mod <- nls(.for, data=.out, -# weights = (1/.out$test)^power, # think about weighting -# start=.ls2, lower=.ls2, -# algorithm="port", -# control=nls.control(tol=1e-5) #think about tolerance -# ) - - #if we need to calculate AIC on a case-by-case basis... - #for model, I think we need to use stats:::logLik.nls for AIC calc... - #see - #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r - #(currently calculating AIc on the lm model on the overall fit on - # all species in all profiles as part of pls_report) - - ################################### - #currently all-data stats in pls_report - # and returning list of models - ################################### - ##.tmp <- summary(mod)$coefficients - ##.p.mod <- .tmp[,4] - ##names(.p.mod) <- gsub("m_", "p_", names(.p.mod)) - ##.out <- data.frame(PROFILE_CODE = y, - ## t(.tmp[,1]), - ## t(.p.mod)) - ##.out - - #output list of mod + data - ################################ - #could add args? - # then drop power from pls_ function formals - # or allow as an overwrite only... - list(mod=mod, #model outputs - args=args, #model args - x.args=x.args) #rsp args - }, silent = TRUE) - if(class(.test)[1]=="try-error"){ - NULL - } else { - .test - } - }) - names(ans) <- .pr.cd - - #returns the list of nls models - #(assuming all viable, one per profile_code in x) - - #testing class options - class(ans) <- unique(c("rsp_pls", class(ans))) - return(ans) - -} - - -############################# -############################# -## pls_report -############################# -############################# - -#' @rdname sp.pls -#' @export - -## now imports from xxx.r -## #' @import data.table - -# this is the model report table -# other pls_ functions use output -# so take care when changing anything... - -# to think about -############################### - -# drop intercept from diagnostics model..? -# can't decide if it should be there -# not in the pls_plot which are based on conventional SA plots... - -# calculate the x_[profile] (contributions) in pls_report -# currently doing this in several of the pls_plot's - -# should the diagnostics be calculated per-species??? -# if some species very large and some very small -# doing them on an all results basis will be overly positive - -pls_report <- function(pls){ - - ans <- lapply(names(pls), function(x){ - .xx <- pls[[x]] - if(!is.null(.xx)){ - .out <- .xx$args$data - .tmp <- summary(.xx$mod)$coefficients - .p.mod <- .tmp[,4] - names(.p.mod) <- gsub("m_", "p_", names(.p.mod)) - .out <- data.frame(PROFILE_CODE = x, - .out, - t(.tmp[,1]), - t(.p.mod), - pred = predict(.xx$mod, newdata=.xx$args$data), - check.names=FALSE) - .out - } else { - NULL - } - }) - ans <- data.table::rbindlist(ans, use.names=TRUE, fill=TRUE) - if(nrow(ans)==0){ - return(as.data.frame(ans)) - } - - ##################### - #thinking about - ##################### - # adding x_[profile] (m_[profile] * profile) calculations here - # currently done on fly in some plots... - - ans$.value <- ans$test - - ####################################### - # previous - # as all-species step - ####################################### - ## .mod <- lm(pred ~ 0 + .value, data = .out) - ## .out$adj.r.sq <- summary(.mod)$adj.r.squared - ## .out$slope <- summary(.mod)$coefficients[1, 1] - ## .out$p.slope <- summary(.mod)$coefficients[1, 4] - ## .out$AIC <- AIC(.mod) - ## .out - - ################################# - # replacing with... - ################################# - #by species calculate stats - # guessing this could be done in data.table??? - .sp.ref <- unique(ans$SPECIES_NAME) - .tmp <- lapply(.sp.ref, function(x){ - .tmp <- subset(ans, SPECIES_NAME==x) - ################# - # note - ################# - # was previouslys pred ~ .value - # and reported intercept and intercept p - # - .mod <- lm(pred ~ 0 + .value, data = .tmp) - ########### - #(also noted in sp_pls_profile) - #if we need to calculate aic based on the method parameters... - #need to read this: - #https://stackoverflow.com/questions/39999456/aic-on-nls-on-r - #see stats:::logLik.nls for AIC calc... - .s.mod <- suppressWarnings(summary(.mod)) - #################### - #above suppress warnings - # is to hide the perfect fit warning - # you get if you fit a marker... - # option to jitters still there - ############# - data.frame(SPECIES_NAME = x, - adj.r.sq = .s.mod$adj.r.squared, - slope = .s.mod$coefficients[1, 1], - p.slope = .s.mod$coefficients[1, 4], - AIC = AIC(.mod) - ) - }) - .tmp <- data.table::rbindlist(.tmp) - ans <- merge(ans, .tmp, by="SPECIES_NAME") - - as.data.frame(ans) -} - - - - -############################# -############################# -## pls_test -############################# -############################# - -#' @rdname sp.pls -#' @export - -## now imports from xxx.r -## #' @import data.table - -# this is the model tests -# this builds from pls_report - -pls_test <- function(pls){ - .rp <- pls_report(pls) - #species - .tmp<- lapply(unique(.rp$SPECIES_NAME), function(i){ - .ans <- subset(.rp, SPECIES_NAME==i) - data.frame(SPECIES_NAME = i, - adj.r.sq = .ans$adj.r.sq[1], - slope=.ans$slope[1], - p.slope=.ans$p.slope[1], - AIC = .ans$AIC[1]) - }) - .sp <- data.table::rbindlist(.tmp) - - #ref profiles - .pn <- names(.rp)[grepl("^p_", names(.rp))] - .ans <- data.table::as.data.table(.rp)[, lapply(.SD, - function(x){length(x[x>0.05])/length(x)}), - .SDcols = .pn] - .ans <- as.data.frame(.ans) - .ans <- (1 - .ans)*100 - names(.ans) <- gsub("^p_", "gp_", names(.ans)) - - list(.species=.sp, - .pls = .ans) -} - - - - - - - -#################################### -#################################### -## pls fitting -#################################### -#################################### - -#includes -# pls_fit_species and -# pls_refit_species -# pls_rebuild - - -#' @rdname sp.pls -#' @export - -pls_fit_species <- function(pls, species, power=1, - refit.profile=TRUE, - as.marker=FALSE, - drop.missing=FALSE, - ...){ - #wrapper for multiple fits of new data to a pls model - .id <- unique(species$SPECIES_NAME) - for(i in .id){ - .sub.sp <- subset(species, SPECIES_NAME==i) - .test <- try(pls_rebuild(pls, species=.sub.sp, power=power, - refit.profile=refit.profile, - as.marker=as.marker, - drop.missing=drop.missing, - ...), - silent=TRUE) - if(class(.test)[1]=="try-error"){ - warning("RSP_PLS> failed to fit: ", i, sep="") - } else { - pls <- .test - } - } - pls -} - - - -#' @rdname sp.pls -#' @export - -pls_refit_species <- function(pls, species, power=1, - refit.profile=TRUE, - as.marker=FALSE, - drop.missing=FALSE, - ...){ - #wrapper for multiple fits of new data to a pls model - .id <- species - for(i in .id){ - .test <- try(pls_rebuild(pls, species=i, power=power, - refit.profile=refit.profile, - as.marker=as.marker, - drop.missing=drop.missing, - ...), - silent=TRUE) - #pass back the error??? - if(class(.test)[1]=="try-error"){ - warning("RSP_PLS> failed to fit: ", i, sep="", - call.=FALSE) - } else { - pls <- .test - } - } - pls -} - - - -#' @rdname sp.pls -#' @export - - -############################# -#this needs a lot of work -############################# - -# pls_fit_species and pls_refit_species -# are now multiple use wrappers for this... -# they for loop try(pls_rebuild(...)) - -# (like pls_(re)fit_'s) -# like to drop power from formals -# maybe ignore or pass from previous, but have option to overwrite via ...? - -# need to update the model handling so it is like sp_pls_profile -# this would sort power issue above -# also means the user can change setting themselves -# THINK ABOUT THIS -# they could make a pls that was not positively constrained -# this would also remove the start, lower and upper options -# from the formals... - -# if we are setting start and lower -# start = lower if start is missing might be safer... -# (see code in sp_pls_profile) - -#needs to allow more constraint -# currently not passing forward the args... - -#mod <- readRDS("C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/mod1.RDS") -#dat <- readRDS("C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate/test/uk.metals.aurn.2b.rds") - -#pls_rebuild(mod, subset(dat, SPECIES_NAME=="[avg.AURN] O3"), power=2, as.marker=T) - - -pls_rebuild <- function(pls, species, power=1, - refit.profile=TRUE, - as.marker=FALSE, - drop.missing=FALSE, - ...){ - - x.args <- list(...) - #hiding model args - #also like to hide power - - .out <- pls_report(pls) - - #cheat - ######################### - .cheat <- character() - .cheat2 <- character() - - ######################### - #standardise inputs - ######################### - if(is.character(species)){ - #assuming this is SPECIES_NAME of the species to be fit - #and species was in modelled data when pls was built... - if(!species[1] %in% .out$SPECIES_NAME){ - stop("RSP_PLS> 'species' not in PLS, please check", - call. = FALSE) - } - .add <- subset(.out, SPECIES_NAME == species[1]) - .out <- subset(.out, SPECIES_NAME != species[1]) - - } else { - #assuming this is respeciate object/data.frame of right structure - .add <- species - } - - ################################### - #get and check species name and id - ################################### - sp.nm <- unique(.add$SPECIES_NAME) - sp.id <- unique(.add$SPECIES_ID) - #both need to be 1 element - if(length(sp.nm) !=1 || length (sp.id) != 1){ - stop("RSP_PLS> 'species' not unique, either missing or multiple", - call. = FALSE) - } - - #if as.marker is character - # use it as marker profile name and reset as.marker to TRUE - # else use species_name as profile name - # wondering if this should be more unique - if(is.character(as.marker)){ - .mrk.nm <- as.marker - as.marker <- TRUE - } else { - .mrk.nm <- sp.nm - } - - ##################### - #as.marker T/F handling - ##################### - if(as.marker){ - #treat species as marker - for(i in names(pls)){ - if(i %in% unique(.add$PROFILE_CODE) & !is.null(pls[[i]])){ - #remark off all print when happy with method - #print(i) - ######################### - #can simplify a lot below - ######################### - x <- pls[[i]] - .da <- subset(x$args$data, SPECIES_NAME != sp.nm) - .da[.mrk.nm] <- 0 - #.cht <- rev(unique(c("test", rev(names(.da))))) - #.da <- .da[.cht] - .da <- .da[rev(unique(c("test", rev(names(.da)))))] - .mn.df <- .da[1,] - #.mn.df[,1] <- sp.id - #.mn.df[,2] <- sp.nm - .mn.df[,c(1,2)] <- c(sp.id, sp.nm) - .mn.df[,3:(ncol(.da)-2)] <- 0 - ############################## - #below might want to be something other than 1 - # e.g. median other others excluding zero's??? - .mn.df[,ncol(.da)-1] <- 1 - ####################################### - #might need to add a jitter to next??? - ####################################### - #print("hi") - #print(.add) - #print(i) - #print(.add[.add$PROFILE_CODE==i,]) - .mn.df[,ncol(.da)] <- .add[.add$PROFILE_CODE==i, ".value"] - if(!is.na(.mn.df[,ncol(.da)])){ - - - ################################## - #a lot below needs more generalising - ################################### - pls[[i]]$args$data <- rbind(.da, .mn.df) - pls[[i]]$args$weights <- (1/pls[[i]]$args$data$test)^power - if(any(!grepl(.mrk.nm, pls[[i]]$args$formula))){ - #update formula - .for <- as.character(pls[[i]]$args$formula) - .for[3] <- paste(.for[3], "+ (`m_", .mrk.nm, - "` * `", .mrk.nm, "`)", - sep="") - pls[[i]]$args$formula <- as.formula(paste(.for[2], .for[1], - .for[3], sep="")) - } - if("start" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$start)){ - #print("adding m_ start") - .arg <- pls[[i]]$args$start - .arg[[paste("m_", .mrk.nm, sep="")]] <-0 - pls[[i]]$args$start <- .arg - } - } - if("lower" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$lower)){ - #print("adding m_ lower") - .arg <- pls[[i]]$args$lower - .arg[[paste("m_", .mrk.nm, sep="")]] <-0 - pls[[i]]$args$lower <- .arg - } - } - if("upper" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$upper)){ - #print("adding m_ upper") - .arg <- pls[[i]]$args$upper - .arg[[paste("m_", .mrk.nm, sep="")]] <- Inf - pls[[i]]$args$upper <- .arg - } - } - - #print(pls[[i]]$args$data) - #print(pls[[i]]$args$formula) - #print(pls[[i]]$args$weights) - ###################### - #nls model do.call might need a try wrapper - ######################## - .cheat2 <- c(.cheat2, i) - - pls[[i]]$mod <- do.call(nls, pls[[i]]$args) - } #stop it trying entry is NA - } else { - #can't build this model update, so drop it! - #either no marker or no previous model - ############################# - #might want to change this to - #leave them alone??? - # just might never get the o3 profile included - # or make the as.marker = FALSE drop the case it - # can't model...? - if(drop.missing){ - .cheat <- c(.cheat, i) - pls[i] <- list(NULL) - } - } - } - #print("doing these [mrk]") - #print(.cheat2) - #print("dropping these [mrk]") - #print(.cheat) - } else { - ###################################### - #species not a marker - ###################################### - #distribute across existing sources - ###################################### - - ############################### - #remark prints when happy with method - ############################### - - ######################### - #like to first better way of doing following - ######################### - - #need to build a unique data set of previous m matrix predictions - ################## - #.test <- .out[.out$pred>0,] - # (had to exclude pred = 0 because these were not yet modelled) - #.out <- subset(.out, SPECIES_ID == unique(.test$SPECIES_ID)[1]) - # (replacing with following because above dropped models if first species - # was missing from those profile_code) - # - .test <- .out[.out$pred>0,] - .out <- .test[!duplicated(.test$PROFILE_CODE),] - - .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT") - .test <- names(.add)[names(.add) %in% .test] - .data <- .add[.test] - - names(.data)[2] <- "refit" - .data <- merge(.out, .data[c(1:2)]) - - ######################### - #note - #if checking .data species may not be unique - # just after a unique (all profile_code) m matrix - # for the added - #print(.data) - - .ms <- names(.data)[grepl("^m_", names(.data))] - .for <- paste("(`", .ms, "`*`", gsub("^m_", "n_", .ms), "`)", - sep="", collapse = "+") - .for <- as.formula(paste("refit~", .for)) - - .ns <- .ms - names(.ns) <- gsub("^m_", "n_", .ms) - - #note - ################## - #model handling temp update - #lower, start and upper - lower <- if("lower" %in% names(x.args)){ - x.args$lower - } else { - 0 - } - start <- if("start" %in% names(x.args)){ - x.args$start - } else { - lower - } - upper <- if("upper" %in% names(x.args)){ - x.args$upper - } else { - Inf - } - .ls <- lapply(.ns, function(x){start}) - .ls2 <- lapply(.ns, function(x){lower}) - .ls3 <- lapply(.ns, function(x){upper}) - - control <- if("control" %in% names(x.args)){ - x.args$control - } else { - nls.control(tol=1e-5) - } - - #print(.data) - #print(.for) - - mod <- nls(.for, data=.data, - #weights = (1/.out$test)^power, - #no weighting currently because species are all the same here! - start=.ls, - lower=.ls2, - upper=.ls3, - algorithm="port", - control=control #think about tolerance - ) - #check.names TRUE was applying make.names - # so turned off when building data.frames for pls model outputs - .ans <- data.frame( - PROFILE_CODE = .data$PROFILE_CODE, - SPECIES_ID = .add$SPECIES_ID[1], - SPECIES_NAME = .add$SPECIES_NAME[1], - t(coefficients(mod)), - test = .data$refit, - check.names=FALSE - ) - names(.ans) <- gsub("^n_", "", names(.ans)) - - #print("doing these") - #print(.ans$PROFILE_CODE) - - #for each build model, put new models in pls - ################################### - #need to move this to working directly from models - for(i in unique(.ans$PROFILE_CODE)){ - .ii <- subset(.ans, PROFILE_CODE==i) - .ii <- .ii[names(.ii) != "PROFILE_CODE"] - .nn <- pls[[i]]$args$data - .nn <- subset(.nn, !SPECIES_NAME %in% unique(.ii$SPECIES_NAME)) - ########### - #cheat - ############# - #print(.nn) - #print(.ii) - .ii <- .ii[names(.ii) %in% names(.nn)] - ######################## - - pls[[i]]$args$data <- rbind(.nn, .ii) - #rebuild model - .for <- as.character(formula(pls[[i]]$mod)) - .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) - .ms <- names(pls[[i]]$args$data) - .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] - .ls <- lapply(.ms, function(x){0}) - names(.ls) <- paste("m_", .ms, sep="") - .da <- pls[[i]]$args$data - pls[[i]]$args$weights <- (1/pls[[i]]$args$data$test)^power - pls[[i]]$args$control <- control - ################# - #can these go now..? - ################# - if("start" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$start)){ - #print("adding m_ start") - .arg <- pls[[i]]$args$start - #.arg[[paste("m_", .mrk.nm, sep="")]] <-0 - pls[[i]]$args$start <- .arg - } - } - if("lower" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$lower)){ - #print("adding m_ lower") - .arg <- pls[[i]]$args$lower - #.arg[[paste("m_", .mrk.nm, sep="")]] <-0 - pls[[i]]$args$lower <- .arg - } - } - if("upper" %in% names(pls[[i]]$args)){ - if(!paste("m_", .mrk.nm, sep="") %in% names(pls[[i]]$args$upper)){ - #print("adding m_ upper") - .arg <- pls[[i]]$args$upper - #.arg[[paste("m_", .mrk.nm, sep="")]] <- Inf - pls[[i]]$args$upper <- .arg - } - } - - } - if(drop.missing){ - ########################################## - #if we are dropping cases were species was - # not available, we need to drop the - # models that were not (re)fit... - #print("dropping these!") - .test <- names(pls)[!names(pls) %in% unique(.ans$PROFILE_CODE)] - #print(.test) - if(length(.test)>0){ - for(i in .test){ - pls[i] <- list(NULL) - } - } - } - } - - ################ - #refit.profiles - ################ - #this might be a little redundant now - - if(refit.profile){ - for(i in names(pls)){ - if(!is.null(pls[[i]])){ - #print(i) - #print(pls[[i]]$args$data) - #print(pls[[i]]$args$formula) - - pls[[i]]$mod <- do.call(nls, pls[[i]]$args) - #pls[[i]]$mod <- nls(.for, data=.da, - # weights = (1/.da$test)^power, # think about weighting - # start=.ls, lower=.ls, - # algorithm="port", - # control=nls.control(tol=1e-5) #think about tolerance - #) - #.for <- as.character(formula(pls[[i]]$mod)) - #.for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) - #.da <- pls[[i]]$args$data - #.ls <- pls[[i]]$args$lower - #print(.da) - #print(.ls) - #print((1/.da$test)^power) - #pls[[i]]$mod <- nls(.for, data=.da, - # weights = (1/.da$test)^power, # think about weighting - # start=.ls, lower=.ls, - # algorithm="port", - # control=nls.control(tol=1e-5) #think about tolerance - #) - #print("refit.profile") - } - } - } - ################ - #output - ################ - pls -} - -################# - - - - -#fix if nulls are an issue -############################ - -#mod3 <- mod3[unlist(lapply(mod3, function(x) !is.null(x)))] - -#test code -#################### - -#inc <- readRDS("C:\\Users\\trakradmin\\OneDrive - University of Leeds\\Documents\\pkg\\respeciate\\_projects\\marylebone03\\.tmp.increment.rds") -#inc$PROFILE_CODE <- as.character(inc$`Start Date`) -#inc$PROFILE_NAME <- as.character(inc$`Start Date`) -#inc <- sp_build_rsp_x(inc, value=".value.inc") - -#sp_match_profile(inc, spq_pm(), matches=20) - -#aa <- sp_profile(c("3157", "4330310", "3941", "4027", "3961")) -#inc.metals <- subset(inc, !grepl("[[]avg.AURN[]]", SPECIES_NAME)) - -#moda <- sp_pls_profile(inc.metals, aa) -#modb <- sp_pls_profile(inc, aa) - -#moda2 <- pls_fit_parent(moda, subset(inc, SPECIES_NAME=="[avg.AURN] PM2.5")) - -#moda2i <- pls_fit_species(moda, subset(inc, SPECIES_NAME=="[avg.AURN] PM2.5")) - - - -############################ -#next steps -############################ - -#note - -# this is rebuild version 2 -# first version currently pls_rebuild.old (unexported) - -#tidy code -# go through and tidy messy code -# NB: data.frame names might be getting changed in some functions -# seemed to be happening in multiple refits.... -# looked like make.name(BAD-NAME) -# think about models with missing input -# leave in or drop?? -# or option to do both... ??? -# think about power and other nls arguments -# need to be handling these better... -# currently re-calaculating on rebuild -# BUT might need to be able to work with user input??? -# update the documents - -# have hidden perfect fit error in pls_report -# think that kills it anywhere -# but should check pls_plot... -# also could add a jigger when fitting marker in rebuild? - - - - - -#################################### -################################### -## pls_plots -################################### -################################### - -#these are all draft - - -#################################### -#################################### -## pls_plot -#################################### -#################################### - - -#' @rdname sp.pls -#' @export - -## now imports via data.table:: -## need this to kill the as.data.table load message -## #' @import data.table -## - -############################# -#this needs a lot of work -############################# - -# this uses unexported rsp_profile_pie function below... -# both pls_plot and rsp_profile_pie need work... - - -pls_plot <- function (pls, n, type = 1, ...){ - - #current using lattice/latticeExtra for the panelling/layers... - - #basic plots finished but... - # currently not passing arguments generally - # the par setting seem to be dropped when using plot(p) - # ahead of end of function - - ############################ - # nags - ############################ - - # type = 1 - ############################ - - # note sure about the layer naming - # zero is not bottom of barchart... - - # type = 2 - ############################ - - # the label positioning is messy (see not about nudge) - - # cex setting too small if only one panel... - - #wondering about - # https://latticeextra.r-forge.r-project.org/man/postdoc.html - # as an alternative to type=2 - # (but 100 percent measured rather than proportion...) - - ################# - #setup - ################# - .x.args <- list(...) - dat <- pls_report(pls) - .ord.pro.c <- rsp_profile_code_order(dat) - .sp.ref <- unique(dat$SPECIES_NAME) - #species - # now defaulting to all plots - species <- if (missing(n)) { - species <- .sp.ref - } - else { - n - } - if (is.numeric(species)) { - if (all(species == -1)) { - species <- .sp.ref - } - else { - species <- .sp.ref[species] - } - } - if (!any(species %in% .sp.ref)) { - stop("RSP_PLS> unknown species, please check", call. = FALSE) - } - ################################ - #note: - # could condition here BUT currently - # holding on to everything until just before plot - # might not need to do this.... - ################################# - - .sp.ord <- unique(dat$SPECIES_ID) - .sp.m.pro <- names(dat)[grep("^m_", names(dat))] - .sp.pro <- gsub("^m_", "", .sp.m.pro) - - #line col.... - .col <- lattice::trellis.par.get("superpose.line")$col[1] - - #bar cols - .cols <- if ("col" %in% names(.x.args)) { - #could include if you supply a function..? - #could use col.regions? - .cols <- .x.args$col - } - else { - .cols <- heat.colors(n = length(.sp.m.pro)) - } - if (length(.cols) != length(.sp.m.pro)) { - stop("pls_plot> halted; expecting ", length(.sp.m.pro), - "colours; given ", length(.cols), sep = "", call. = FALSE) - } - - ###################### - # build x_[profile] - ###################### - for (i in .sp.pro) { - dat[, paste("x_", i, sep = "")] <- dat[, paste("m_", - i, sep = "")] * dat[, i] - } - .sp.x.pro <- names(dat)[grep("^x_", names(dat))] - .rep <- dat[c("SPECIES_NAME", "SPECIES_ID", "PROFILE_CODE", - .sp.x.pro)] - .rep <- data.table::melt(data.table::as.data.table(.rep), - id = c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE")) - .tot <- data.table::as.data.table(dat) - .cs <- c(".value", "pred", .sp.x.pro) - .tot <- .tot[, lapply(.SD, function(x) sum(x, na.rm = TRUE)), - .SDcols = .cs, by = c("SPECIES_ID", "SPECIES_NAME")] - - ########################### - # now plotting as panels - # using - ########################### - - ###################################################### - # now using rsp_ function to track all pls model cases - # previous method only tracked valid cases for the plotted data - # so no gaps where models dropped/not built... - ######################################################### - .rep$.index <- as.numeric(factor(.rep$PROFILE_CODE, levels = .ord.pro.c, - ordered = TRUE)) - dat$.index <- as.numeric(factor(dat$PROFILE_CODE, levels = .ord.pro.c, - ordered = TRUE)) - .tmp <- dat[c("SPECIES_ID", "PROFILE_CODE", ".index", ".value", "pred")] - .rep <- data.table::merge.data.table(.rep, .tmp) - - .rep$variable <- gsub("^x_", "", .rep$variable) - - #print(names(.rep)) - #return(dat) - - .rep <- subset(as.data.frame(.rep), SPECIES_NAME %in% species) - - if (1 %in% type) { - - #lattice sets panel order based - .sp <- if(is.factor(.rep$SPECIES_NAME)){ - levels(.rep$SPECIES_NAME) - } else { - sort(unique(.rep$SPECIES_NAME)) - } - .sp <- .sp[.sp %in% .rep$SPECIES_NAME] - #.y.scale <- lapply(unique(.rep$SPECIES_NAME), function(x){ - .y.scale <- lapply(.sp, function(x){ - .tmp <- .rep[.rep$SPECIES_NAME==x,] - c(0, max(c(.tmp$.value, .tmp$pred), na.rm=TRUE)) - }) - ############################################### - #use loa method to generalise this? - ############################################### - - - p2 <- lattice::xyplot(.value ~ .index | SPECIES_NAME, .rep, - panel=lattice::panel.xyplot, - type="l", xlab="Sample [index]", - ylab="Measurement", - scales=list(relation="free"), - ylim=.y.scale) - - p <- lattice::barchart(value ~ factor(.index) | SPECIES_NAME, .rep, - groups=.rep$variable, stack=TRUE, - panel=function(x, y, col, groups, ..., subscripts){ - #grid control like loa - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - lattice::panel.grid, ...) - lattice::panel.barchart(x=x, y=y, col=col, - groups=groups, - subscripts=subscripts, ...) - .y <- .rep$.value[subscripts] - #col needs to be from option[1] - lattice::panel.xyplot(x=x, y=.y, - col=.col, - type="l", - subscripts=subscripts,...) - }, - scales=list(relation="free"), - #auto.key=list(space="top", columns=2, - # col.line=.cols, - # points=FALSE, rectangles=TRUE), - ylim=.y.scale, - col=.cols, - border=NA, - #par.settings = list(superpose.polygon = list(col = .cols, - # pch =c (15, 15)), - #superpose.symbol = list(fill = .cols)), - auto.key=list(space="top", columns = 3, - cex=0.8, - points=FALSE, - rectangles=TRUE)) #, - #xscale.components = function(lim,...){ - # lim <- as.numeric(as.character(lim)) - # ans <- lattice::xscale.components.default(lim=lim,...) - # print(ans) - # ans - #}) - plot(update(latticeExtra::doubleYScale(p2, p, add.axis = FALSE), - par.settings = list(superpose.polygon = list(col = .cols), - superpose.symbol = list(fill = .cols)))) - - #p2 <- lattice::xyplot(.value ~ factor(.index) | SPECIES_NAME, dat, - # type="l", scales=list(relation="free")) - #plot(cheat(p, latticeExtra::as.layer(p2))) - - #plot(latticeExtra::doubleYScale(p, p2, add.axis=FALSE, add.ylab2=FALSE)) - } - if (2 %in% type) { - - - p <- lattice::xyplot(value ~ .index | SPECIES_NAME, .rep, - groups=.rep$variable, - totals=.rep$.value, - scales=list(relation="free", - draw=FALSE), - ylab="", xlab="", - col = .cols, - auto.key=list(space="top", columns = 3, - cex=0.8, - points=FALSE, - rectangles=TRUE), - ylim=c(-2,2), xlim=c(-2,2), - between = list(x = 0.2, y = 0.2), - panel=rsp_panel.pie, - par.settings = list(superpose.polygon = list(col = .cols), - axis.line = list(col = 'transparent'), - superpose.symbol = list(fill = .cols)) - ) - plot(p) - - } - invisible(.rep) -} - - - -#test <- "C:/Users/trakradmin/OneDrive - University of Leeds/Documents/pkg/respeciate" -#mod <- readRDS(paste(test, "mod2.RDS", sep="/")) -#pls_plot(mod) - - - - - - -#################################### -#################################### -## pls_plot_species -#################################### -#################################### - - -#' @rdname sp.pls -#' @export - -## now imports from xxx.r -## #' @import data.table - -############################# -#this needs a lot of work -############################# - - -pls_plot_species <- function (pls, n, type = 1, ...) -{ - ########################### - # setup - ########################### - .x.args <- list(...) - dat <- pls_report(pls) - .ord.pro.c <- rsp_profile_code_order(dat) - .sp.ref <- unique(dat$SPECIES_NAME) - species <- if (missing(n)) { - .sp.ref - #default option (print the lot...) - ############################ - #possibly a warning if lots of species to plot - ################## - } else { - n - } - if (is.numeric(species)) { - if (all(species == -1)) { - species <- .sp.ref - } - else { - species <- .sp.ref[species] - } - } - if (!any(species %in% .sp.ref)) { - stop("RSP_PLS> unknown species, please check", call. = FALSE) - } - ############################ - #if not earlier, then here? - #possibly a warning if lots of species to plot - ################## - - ######################### - #could drop a lot of this?? - ######################### - .xlb <- if ("xlab" %in% names(.x.args)) { - .x.args$xlab - } else { - "Measurement" - } - .ylb <- if ("ylab" %in% names(.x.args)) { - .x.args$ylab - } else { - "Model" - } - .bc <- if ("col" %in% names(.x.args)) { - .x.args$col - } else { - par("col") - } - .mc <- if ("mod.col" %in% names(.x.args)) { - .x.args$mod.col - } else { - "red" - } - dat <- subset(dat, SPECIES_NAME %in% species) - # lims <- range(c(d2$.value, d2$pred), na.rm = TRUE, finite = TRUE) - # mod <- lm(pred ~ 0 + .value, d2) - # .sum <- paste("y = ", signif(summary(mod)$coefficients[1, - # 1], 3), "x (adj.R2 = ", signif(summary(mod)$adj.r.squared, - # 3), ")", sep = "") - .lims <- lapply(species, function(x){ - .d <- subset(dat, SPECIES_NAME==x) - range(c(.d$pred, .d$.value), finite=TRUE, na.rm=TRUE) - }) - if (1 %in% type) { - p1.ls <- list(x=pred~.value | SPECIES_NAME, data=dat, - #prepanel forces x and y lims to same range - prepanel=function(...){ - .tmp <- prepanel.default.xyplot(...) - .tmp$xlim <- range(c(.tmp$xlim, .tmp$ylim)) - .tmp$ylim <- .tmp$xlim - .tmp - }, - panel= function(x, y, xlim, ylim, ...){ - #user control of grid - like loa... - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - lattice::panel.grid, ...) - #TO DO - #user control of y=x - panel.ablineq(a = 0, b = 1, adj = c(0,1), - col.line ="grey", lty=2, label="") - #user control of main plotted data via - # standard lattice - panel.xyplot(x=x, y=y, ...) - #CURRENTLY JUST col via mod.col - #user control of model - panel.ablineq(lm(y ~ x + 0), cex = 0.8, - x = min(c(x, y), na.rm=TRUE), - y = max(c(x, y), na.rm=TRUE), - r.squared = TRUE, adj = c(0,1), - sep = " (", sep.end = ")", - offset=0, varStyle = NULL, - col.line = .mc, col.text = .mc, digits = 2) - }, - xlab="Measurement", ylab="model", - scales=list(y=list(relation="free", - rot=90), - x=list(relation="free"))) - p1.ls <- modifyList(p1.ls, .x.args) - p <- do.call(xyplot, p1.ls) - plot(p) - - # plot(d2$.value, d2$pred, type = "n", main = i, col = .bc, - # xlab = .xlb, ylab = .ylb, xlim = lims, ylim = lims) - # grid() - # abline(a = 0, b = 1, col = "grey") - # points(d2$.value, d2$pred) - # abline(mod, col = .mc, lty = 2) - # text(lims[1], lims[2], .sum, adj = c(0, 1), cex = 0.75) - } - if (2 %in% type) { - #xlab - if(!"xlab" %in% names(.x.args)){ - .x.args$xlab <- "Sample [index]" - } - if(!"ylab" %in% names(.x.args)){ - .x.args$ylab <- "Measurement, Model" - } else { - if(length(.x.args$ylab)>1){ - if(!"key.text" %in% names(.x.args)){ - .x.args$key.text <- .x.args$ylab[1:2] - } - .x.args$ylab <- paste(.x.args$ylab[1], .x.args$ylab[2], sep =", ") - } - } - if(!"key.text" %in% names(.x.args)){ - .x.args$key.text <- c("Measurement", "Model") - } - if(!"col" %in% names(.x.args)){ - .x.args$col <- trellis.par.get("superpose.line")$col[1:2] - } - if("mod.col" %in% names(.x.args)){ - .x.args$col <- c(.x.args$col[1], .x.args$mod.col) - } - - - #ylab - #can to two terms for - - #if("ylab" %in% names(.x.args)){ - # if(length(.x.args$ylab)>1){ - # if(!"key.text" %in% names(.x.args)){ - # .x.args$key.text <- .x.args$ylab[1:2] - # } - # .x.args$ylab <- paste(.x.args$ylab[1], .x.args$ylab[2], sep =", ") - # } else { - # if(!"key.text" %in% names(.x.args)){ - # .x.args$key.text <- c("Measurement", "Model") - # } - # } - #} else { - # if(!"key.text" %in% names(.x.args)){ - # .x.args$key.text <- c("Measurement", "Model") - # } - # .x.args$ylab <- "Measurement, Model" - #} - - - - - ######################### - #previous code - ######################### - #plot(d2$.value, type = "n", main = i, col = .bc, - # ylab = .ylb, xlab = .xlb, ylim = lims) - #lines(d2$.value) - #lines(d2$pred, col = .mc) - ######################## - #using standardised index - #make 'ordered profile codes' at top - # before any subsetting... - # .ord.pro.c <- rsp_profile_code_order(dat) - dat$.index <- as.numeric(factor(dat$PROFILE_CODE, levels=.ord.pro.c, - ordered = TRUE)) - p2.ls <- list(x= .value + pred ~ .index | SPECIES_NAME, data=dat, - auto.key = list(text=.x.args$key.text, - space="top", columns=2), - type="l", - panel= function(...){ - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - panel.grid, ...) - lattice::panel.xyplot(...) - }, - scale=list(relation="free"), - par.settings = simpleTheme(col=.x.args$col)) - p2.ls <- modifyList(p2.ls, .x.args) - p <- do.call(xyplot, p2.ls) - plot(p) - ###################### - - # or any with any missing are plot on different x scale - # maybe find longest, take range for that - #xyplot(.value + .pred ~ ) - } - - invisible(dat) -} - - - -#################################### -#################################### -## pls_plot_profile -#################################### -#################################### - - -#' @rdname sp.pls -#' @export - -## now imports from xxx.r -## #' @import data.table - - -############################# -#this needs a lot of work -############################# - -pls_plot_profile <- function (pls, n, log = FALSE, ...) -{ - ######################### - #previous plot used base r graphics - #this moved to lattice/latticeExtra - #so we can panel outputs - ######################### - - #setup - .x.args <- list(...) - .plt.args <- .x.args[names(.x.args %in% c())] - dat <- pls_report(pls) - .sp.ord <- unique(dat$SPECIES_ID) - .sp.m.pro <- names(dat)[grep("^m_", names(dat))] - .sp.pro <- gsub("^m_", "", .sp.m.pro) - #defaulting n to all profiles as one plot - profile <- if (missing(n)) { - profile <- .sp.pro - } else { - n - } - if (is.numeric(profile)) { - if (all(profile == -1)) { - profile <- .sp.pro - } - else { - profile <- .sp.pro[profile] - } - } - if (!any(profile %in% .sp.pro)) { - stop("RSP_PLS> unknown profile(s), please check", call. = FALSE) - } - - ######################### - #build x_[profile] - ######################### - m_profile <- paste("m_", profile, sep = "") - dat <- dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", - profile, m_profile, "pred", ".value")] - for (i in profile) { - dat[, paste("x_", i, sep = "")] <- dat[, paste("m_", - i, sep = "")] * dat[, i] - } - .rep <- data.table::as.data.table(dat) - .cols <- c(".value", "pred", paste("x_", profile, sep = "")) - .rep <- .rep[, lapply(.SD, function(x) sum(x, na.rm = TRUE)), - .SDcols = .cols, by = c("SPECIES_ID", "SPECIES_NAME")] - .rep <- as.data.frame(.rep) - - ######################### - # y2 setup - ######################### - # by default this is .value - # but might want mod prediction - if ("y2" %in% names(.x.args) && .x.args$y2 == "pred") { - for (i in profile) { - .rep[, paste("pc_", i, sep = "")] <- - (.rep[, paste("x_", i, sep = "")]/.rep$pred) * 100 - } - } - else { - for (i in profile) { - .rep[, paste("pc_", i, sep = "")] <- - (.rep[, paste("x_", i, sep = "")]/.rep$.value) * 100 - } - } - #might not need all of following now we - #we are not pulling apart to plot one at time... - dat <- dat[!duplicated(dat$SPECIES_NAME), ] - dat$PROFILE_NAME <- dat$PROFILE_NAME[1] - dat$PROFILE_CODE <- dat$PROFILE_CODE[1] - dat <- merge(.rep, dat[c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", - profile)], ) - dat <- dat[order(ordered(dat$SPECIES_ID, levels = .sp.ord)), ] - - ################################ - # build pc_[profile] - ################################ - rownames(dat) <- 1:nrow(dat) - .ref <- names(dat)[grep("pc_", names(dat))] - .oth <- c("SPECIES_ID", "SPECIES_NAME", "PROFILE_CODE", ".value", "pred") - .temp <- data.table::as.data.table(dat[c(.oth, gsub("pc_", "", .ref))]) - .d1 <- data.table::melt(.temp, measure.vars = gsub("pc_", "", .ref), - variable.name = "pls_profile", value.name = "loading") - .temp <- data.table::as.data.table(dat[c(.oth, .ref)]) - .d2 <- data.table::melt(.temp, measure.vars = .ref, - variable.name = "pls_profile", value.name = "percent_contr") - .d2$pls_profile <- gsub("pc_", "", .d2$pls_profile) - dat <- as.data.frame(merge(.d1, .d2, all=T)) - ############################# - - ############################ - #now using lattice to handle logs - ############### - #.dat <- dat - #don't need local version of dat because not changing data ahead of plot - #if(log){ - # .dat$loading <- log10(.dat$loading) - # .ylim <- lapply(profile, function(x){ - # .temp <- subset(.dat, pls_profile==x) - # .temp <- range(.temp$loading, na.rm=TRUE, finite=TRUE) - # if(.temp[1] == .temp[2]){ - # .temp <- c(.temp[1]-1, .temp[1]+1) - # } - # range(c(floor(.temp), ceiling(.temp))) - # }) - #} else { - # .ylim <- lapply(profile, function(x){ - # .temp <- subset(.dat, pls_profile==x) - # .temp <- range(.temp$loading, na.rm=TRUE, finite=TRUE) - # range(pretty(.temp)) - # }) - #} - - - ###################### - #plot - ###################### - #now using lattice/latticeExtra - ## - #think there is more here that can be generalized... - p1.ls <- list(x = loading~SPECIES_NAME | pls_profile, - data=dat, ylab="Source Loading", - panel = function(...){ - rsp_panelPal("grid", list(h=-1,v=-1, col="grey", lty=3), - panel.grid, ...) - panel.barchart(...) - }, - between=list(y=.2), - scales=list(x=list(rot=90), - y=list(rot=c(0,90), - relation="free")), - layout=c(1,length(profile))) - if(log){ - p1.ls$scales$y$log=10 - p1.ls$yscale.components <- rsp_yscale.component.log10 - } - p1.ls <- modifyList(p1.ls, .x.args) - if(!"col" %in% names(p1.ls)){ - p1.ls$col <- trellis.par.get("superpose.line")$col[1] - } - p1 <- do.call(barchart, p1.ls) - if("mod" %in% names(.x.args) && !.x.args$mod){ - #if mod FALSE just plot 1 - plot(p1) - } else { - #add mod layer (total contributions) as y2 - .col2 <- if("mod.col" %in% names(.x.args)){ - .x.args$mod.col - } else { - trellis.par.get("superpose.line")$col[2] - } - p2.ls <- list(x = percent_contr ~ factor(SPECIES_NAME) | pls_profile, - pch=16, type=c("h", "p"), col= c(.col2, .col2), - ylab="Total Contribution (%)", - data=dat) - .tmp <- .x.args[grepl("^mod[.]", names(.x.args))] - if(length(.tmp)>0){ - names(.tmp) <- gsub("^mod[.]", "", names(.tmp)) - p2.ls <- modifyList(p2.ls, .tmp) - } - p2 <- do.call(xyplot, p2.ls) - plot(update(doubleYScale(p1, p2, add.ylab2 = TRUE), - par.settings = simpleTheme(col=c(p1.ls$col[1], .col2)))) - } - - ############ - #output - ############ - #could pass plot and data as list??? - return(invisible(dat)) -} - - - - - - - - - - - - - - - - - - -################ -################ -## unexported -################ -################ - -# profile code order -# get profile order in case you need it latter... - -rsp_profile_code_order <- function(data){ - .tmp <- data.table::as.data.table(data)[, .(ans=length(unique(PROFILE_CODE))),by="SPECIES_NAME"] - .tmp <- subset(.tmp, ans == max(.tmp$ans, na.rm=TRUE))$SPECIES_NAME - .tmp <- subset(data, SPECIES_NAME %in% .tmp) - sort(unique(.tmp$PROFILE_CODE)) -} - - -#log axis hander -#based on lattice text book method - -#issues?? -# could be problem with y padding when log=T and .value range is wide... - -rsp_yscale.component.log10 <- function(lim, ...) { - ans <- yscale.components.default(lim = lim, ...) - tick.at <- pretty(lim) - tick.at <- tick.at[tick.at == floor(tick.at)] - tick.at <- tick.at[tick.at < max(lim, na.rm=TRUE) & tick.at > min(lim, na.rm=TRUE)] - ans$left$ticks$at <- tick.at - ans$left$labels$at <- tick.at - ans$left$labels$labels <- c(format(10^(tick.at), - drop0trailing = TRUE, - scientific = FALSE)) - #print(ans$left$labels$labels) - ####################### - #need to sort of right labeling - # dropped for now... - #ans$right <- ans$left - ans -} - - -#lattice panel pal -#based on panel handler in loa - -rsp_panelPal <- function(.name, .ls, .panel, ...){ - .x.args <- list(...) - if(!.name %in% names(.x.args) || !is.logical(.x.args[[.name]]) || - .x.args[[.name]]){ - .name2 <- paste("^", .name, "[.]", sep="") - if(.name %in% names(.x.args) && is.list(.x.args[[.name]])){ - .tmp <- .x.args[[.name]] - if(length(.tmp)>0){ - names(.tmp) <- paste(.name, names(.tmp), sep=".") - .x.args <- modifyList(.tmp, .x.args) - } - } - .x.args <- .x.args[grepl(.name2, names(.x.args))] - if(length(.x.args)>0){ - names(.x.args) <- gsub(.name2, "", names(.x.args)) - .ls <- modifyList(.ls, .x.args) - } - do.call(.panel, .ls) - } -} - - - -# could move this into the function... - -rsp_panel.pie <- - function (x, y=NULL, groups=NULL, subscripts, totals=NULL, - labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE, - init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45, - col = NULL, border = 1, lty = NULL, main = NULL, ...) - { - - #this is graphics::pie with a couple of modifications... - #many thanks to... - #R Core Team (2023). _R: A Language and Environment for Statistical Computing_. R Foundation - #for Statistical Computing, Vienna, Austria. . - - #if (!is.numeric(x) || any(is.na(x) | x < 0)) - # stop("'x' values must be positive.") - - ######################### - #measurement totals - .y <- totals[subscripts] - ref <- sapply(unique(groups), function(g){ - sum(.y[groups==g], na.rm=TRUE) - }) - .total <- mean(ref, na.rm=TRUE) - - ########################## - #profile contributions to model - # as percentage of measurements - ans <- sapply(unique(groups), function(g){ - sum(y[groups==g], na.rm=TRUE) - }) - ans <- (ans / .total) * 100 - - ##################### - #cheat because following comes from - #pie function in base r... - x <- ans - - if (is.null(labels)) - labels <- as.character(unique(groups)) - else labels <- as.graphicsAnnot(labels) - labels = paste(labels, " (", - round(ans, digits=1), "%)", sep = "") - - if (any(x == 0)) { - labels <- labels[x != 0] - col <- col[x != 0] - x <- x[x != 0] - } - my.tot <- sum(x, na.rm=TRUE) - ######################## - #this adds extra void area - # if does not account for - # 99 percent of the - # measurements - if (my.tot < 99) { - x <- c(x, 100 - my.tot) - labels <- c(labels, "[hide]") - col <- c(col, NA) - init.angle <- init.angle + (((100 - my.tot)/200) * 360) - } - x <- c(0, cumsum(x)/sum(x)) - dx <- diff(x) - nx <- length(dx) - - ###################### - #???? - pin <- par("pin") - xlim <- ylim <- c(-1, 1) - if (pin[1L] > pin[2L]) - xlim <- (pin[1L]/pin[2L]) * xlim - else ylim <- (pin[2L]/pin[1L]) * ylim - - ######################## - #col setting - # this needs generalising like - # other pls_plot - if (is.null(col)) - col <- if (is.null(density)) - c("white", "lightblue", "mistyrose", "lightcyan", - "lavender", "cornsilk") - else par("fg") - - ######################## - #border setting - # needs generalising... - if (!is.null(border)) - border <- rep_len(border, nx) - - ############## - #lty - # needs generalising... - if (!is.null(lty)) - lty <- rep_len(lty, nx) - - ############## - #angle of segment - angle <- rep(angle, nx) - if (!is.null(density)) - density <- rep_len(density, nx) - twopi <- if (clockwise) - -2 * pi - else 2 * pi - t2xy <- function(t) { - t2p <- twopi * t + init.angle * pi/180 - list(x = radius * cos(t2p), y = radius * sin(t2p)) - } - ########################### - #like to nudge these if percent before and - # this one are both small - # (making labels close) - - for (i in 1L:nx) { - if (!as.character(labels[i]) == "[hide]") { - n <- max(2, floor(edges * dx[i])) - P <- t2xy(seq.int(x[i], x[i + 1], length.out = n)) - lattice::lpolygon(c(P$x, rev(P$x * 0.5)), c(P$y, rev(P$y * - 0.5)), density = density[i], angle = angle[i], - border = border[1], col = col[i], lty = lty[i]) - P <- t2xy(mean(x[i + 0:1])) - lab <- as.character(labels[i]) - if (!is.na(lab) && nzchar(lab)) { - lattice::llines(c(1, 1.2) * P$x, c(1, 1.2) * P$y) - lattice::ltext(1.3 * P$x, 1.3 * P$y, labels[i], xpd = TRUE, - cex=0.7, adj = ifelse(P$x < 0, 1, 0), ...) - } - } - } - lattice::ltext(0, 0, label = paste("sum\n", signif(my.tot, 3), "%", - sep = ""), cex=0.7) - } - - - - - -#think about -####################################### -# printing amount missing as a segment -# adding plot arg control like in plot.respeciate -# adding args to change the displacement of labels - -rsp_profile_pie <- function (x, labels = names(x), edges = 200, radius = 0.8, - clockwise = FALSE, - init.angle = if (clockwise) 90 else 0, - density = NULL, angle = 45, col = NULL, - border = NULL, lty = NULL, main = NULL, ...) -{ - #this is graphics::pie with a couple of modifications... - #many thanks to... - #R Core Team (2023). _R: A Language and Environment for Statistical Computing_. R Foundation - #for Statistical Computing, Vienna, Austria. . - - #print(labels) - #print(col) - - if (!is.numeric(x) || any(is.na(x) | x < 0)) - stop("'x' values must be positive.") - if (is.null(labels)) - labels <- as.character(seq_along(x)) - else labels <- as.graphicsAnnot(labels) - - #added to remove any source with a zero contribution - #but hold labels and col alignment - if(any(x==0)){ - labels <- labels[x!=0] - col <- col[x!=0] - x <- x[x!=0] - } - my.tot <- sum(x) - if(my.tot < 99){ - x <- c(x, 100-my.tot) - labels <- c(labels, "[hide]") - col <- c(col, NA) - init.angle <- init.angle + (((100-my.tot)/200)*360) - } - - x <- c(0, cumsum(x)/sum(x)) - dx <- diff(x) - nx <- length(dx) - plot.new() - pin <- par("pin") - xlim <- ylim <- c(-1, 1) - if (pin[1L] > pin[2L]) - xlim <- (pin[1L]/pin[2L]) * xlim - else ylim <- (pin[2L]/pin[1L]) * ylim - dev.hold() - on.exit(dev.flush()) - plot.window(xlim, ylim, "", asp = 1) - if (is.null(col)) - col <- if (is.null(density)) - c("white", "lightblue", "mistyrose", "lightcyan", - "lavender", "cornsilk") - else par("fg") -# if (!is.null(col)) -# col <- rep_len(col, nx) - if (!is.null(border)) - border <- rep_len(border, nx) - if (!is.null(lty)) - lty <- rep_len(lty, nx) - angle <- rep(angle, nx) - if (!is.null(density)) - density <- rep_len(density, nx) - twopi <- if (clockwise) - -2 * pi - else 2 * pi - t2xy <- function(t) { - t2p <- twopi * t + init.angle * pi/180 - list(x = radius * cos(t2p), y = radius * sin(t2p)) - } - - for (i in 1L:nx) { - - if(!as.character(labels[i]) == "[hide]"){ - n <- max(2, floor(edges * dx[i])) - P <- t2xy(seq.int(x[i], x[i + 1], length.out = n)) - #changed shape to include hole - polygon(c(P$x, rev(P$x*0.5)), c(P$y, rev(P$y*0.5)), - density = density[i], angle = angle[i], - border = border[i], col = col[i], lty = lty[i]) - P <- t2xy(mean(x[i + 0:1])) - lab <- as.character(labels[i]) - if (!is.na(lab) && nzchar(lab)) { - # 1.2 and 1.3 are the extenders when moving labels way from - # the pie plot itself - lines(c(1, 1.2) * P$x, c(1, 1.2) * P$y) - text(1.3 * P$x, 1.3 * P$y, labels[i], xpd = TRUE, - adj = ifelse(P$x < 0, 1, 0), ...) - } - } - } - - text(0,0, label=paste("sum\n",signif(my.tot, 3), "%", sep="")) - title(main = main, ...) - invisible(NULL) -} - - - -########################### -########################### -## pls_refit_species -########################### -########################### - - -# superseded by pls_fit_species -# now not exported - -# need to update the model handling so it is like sp_pls_profile -# this would sort power issue above -# also means the user can change setting themselves -# THINK ABOUT THIS -# they could make a pls that was not positively constrained - - -rsp_pls_refit_species <- function(pls, name, power=1, - ...){ - .xx <- pls_report(pls) - #name might want to be case-non-sensitive at some point - #think about how to do this one... - .data <- .xx[.xx$SPECIES_NAME==name,] - #get and hold all the m_ values - #update profile contributions for named species - .ms <- names(.data)[grepl("^m_", names(.xx))] - .xs <- gsub("^m_", "", .ms) - .for <- paste("(`", .ms, "`*`", .xs, "`)", - sep="", collapse = "+") - .for <- as.formula(paste("test~", .for)) - .da <- .data[!names(.data) %in% .xs] - - - .ls <- lapply(.xs, function(x){0}) - names(.ls) <- .xs - - ################# - #user might want to set this??? - - .ls2 <- lapply(.xs, function(x){.data[1, x]}) - names(.ls2) <- .xs - - mod <- nls(.for, data=.da, - #weights = 1/(.out$test^push), # think about weighting - start=.ls2, lower=.ls, - algorithm="port", - control=nls.control(tol=1e-5) #think about tolerance - ) - - .data[.xs] <- data.frame(t(coefficients(mod))) - - #lazy - .ans <- .data - - for(i in .ans$PROFILE_CODE){ - .ii <- subset(.ans, PROFILE_CODE==i) - .ii <- .ii[names(.ii) %in% names(pls[[i]]$args$data)] - .sp.ord <- unique(pls[[i]]$args$data$SPECIES_ID) - pls[[i]]$args$data <- subset(pls[[i]]$args$data, SPECIES_NAME!=name) - pls[[i]]$args$data <- rbind(pls[[i]]$args$data, .ii) - #put back in right order - pls[[i]]$args$data <- - pls[[i]]$args$data[order(ordered(pls[[i]]$args$data$SPECIES_ID, - levels=.sp.ord)),] - #rebuild model - .for <- as.character(formula(pls[[i]]$mod)) - .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) - .ms <- names(pls[[i]]$args$data) - .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] - .ls <- lapply(.ms, function(x){0}) - names(.ls) <- paste("m_", .ms, sep="") - .da <- pls[[i]]$args$data - - pls[[i]]$mod <- nls(.for, data=.da, - weights = (1/.da$test)^power, # think about weighting - start=.ls, lower=.ls, - algorithm="port", - control=nls.control(tol=1e-5, - warnOnly = TRUE) #think about tolerance - ) - } - - invisible(pls) - -} - - - -#################################### -#################################### -## pls_fit_parent -#################################### -#################################### - -# superseded by pls_fit_species -# now now exported - -# (like pls_refit_species) -# like to drop power from formals -# maybe ignore or pass overwrites via ...? - -# need to update the model handling so it is like sp_pls_profile -# this would sort power issue above -# also means the user can change setting themselves -# THINK ABOUT THIS -# they could make a pls that was not positively constrained -# this would also remove the start, lower and upper options -# from the formals... - -# parent could already be in x -# then parent could just be the name of parent??? - -# also a case for using this to add a non-parent to x -# e.g. pls_fit_unknown_species... -# to fit a species to the existing model as a source apportion of -# that unknown... -# in which case maybe this should just be a wrapper for that -# with the start, lower and upper like below - -# if we are setting start and lower -# start = lower if start is missing might be safer... - - -rsp_pls_fit_parent <- function(pls, parent, power=1, - start=100, - lower=50, upper=200, ...){ - - .out <- pls_report(pls) - #parent should only have one species - #and have same profiles as pls model data - #and its contribution to all sources is set by .value - - .out <- subset(.out, SPECIES_ID == unique(.out$SPECIES_ID)[1]) - .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT") - .test <- names(parent)[names(parent) %in% .test] - .data <- parent[.test] - names(.data)[2] <- "parent" - .data <- merge(.out, .data[c(1:2)]) - - #formula - .ms <- names(.data)[grepl("^m_", names(.out))] - .for <- paste("(`", .ms, "`*`", gsub("^m_", "n_", .ms), "`)", - sep="", collapse = "+") - .for <- as.formula(paste("parent~", .for)) - - .ns <- .ms - names(.ns) <- gsub("^m_", "n_", .ms) - .ls <- lapply(.ns, function(x){start}) - .ls2 <- lapply(.ns, function(x){lower}) - .ls3 <- lapply(.ns, function(x){upper}) - - mod <- nls(.for, data=.data, - #weights = (1/.out$test)^power, # think about weighting - start=.ls, - lower=.ls2, - upper=.ls3, - algorithm="port", - control=nls.control(tol=1e-5) #think about tolerance - ) - .ans <- data.frame( - PROFILE_CODE = .data$PROFILE_CODE, - SPECIES_ID = parent$SPECIES_ID[1], - SPECIES_NAME = parent$SPECIES_NAME[1], - t(coefficients(mod)), - test = .data$parent - ) - names(.ans) <- gsub("^n_", "", names(.ans)) - for(i in .ans$PROFILE_CODE){ - .ii <- subset(.ans, PROFILE_CODE==i) - .ii <- .ii[names(.ii) != "PROFILE_CODE"] - pls[[i]]$args$data <- - rbind(pls[[i]]$args$data, .ii) - #rebuild model - .for <- as.character(formula(pls[[i]]$mod)) - .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) - .ms <- names(pls[[i]]$args$data) - .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] - .ls <- lapply(.ms, function(x){0}) - names(.ls) <- paste("m_", .ms, sep="") - .da <- pls[[i]]$args$data - - pls[[i]]$mod <- nls(.for, data=.da, - weights = (1/.da$test)^power, # think about weighting - start=.ls, lower=.ls, - algorithm="port", - control=nls.control(tol=1e-5) #think about tolerance - ) - } - - pls - -} - - - -#previous version of rebuild... - - -rsp_pls_rebuild.old <- function(pls, species, power=1, - refit.profile=TRUE, ...){ - - x.args <- list(...) - #hiding model args - - .out <- pls_report(pls) - #species / parent should only have one species - # note: parent is name from previous function - # maybe change now??? - #and have same profiles as pls model data - #and its contribution to all sources is set by .value - - #note - ################################ - #following just done quickly to replace - # two previous functions pls_fit_parent and pls_refit_species - - if(is.character(species)){ - #assuming this is SPECIES_NAME of the species to be fit - #and species was in modelled data when pls was built... - if(!species[1] %in% .out$SPECIES_NAME){ - stop("RSP_PLS> 'species' not in PLS, please check", - call. = FALSE) - } - parent <- subset(.out, SPECIES_NAME == species[1]) - .out <- subset(.out, SPECIES_NAME != species[1]) - - } else { - #assuming this is respeciate object data.frame of right structure - parent <- species - } - #get a 'safe' profile - #not sure this will work if any sources are not fit to first species - .test <- .out[.out$pred>0,] - .out <- subset(.out, SPECIES_ID == unique(.test$SPECIES_ID)[1]) - .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT") - .test <- names(parent)[names(parent) %in% .test] - .data <- parent[.test] - names(.data)[2] <- "parent" - .data <- merge(.out, .data[c(1:2)]) - - ###################### - #for trace - #add parent to this as m_dummy? - #that should fit as n_dummy = 1... - - ########### - #cheat - ############# - - #print(.data) - - #formula - #changed .out to .data in next line - .ms <- names(.data)[grepl("^m_", names(.data))] - .for <- paste("(`", .ms, "`*`", gsub("^m_", "n_", .ms), "`)", - sep="", collapse = "+") - .for <- as.formula(paste("parent~", .for)) - - .ns <- .ms - names(.ns) <- gsub("^m_", "n_", .ms) - - #note - ################## - #model handling temp update - #lower, start and upper - lower <- if("lower" %in% names(x.args)){ - x.args$lower - } else { - 0 - } - start <- if("start" %in% names(x.args)){ - x.args$start - } else { - lower - } - upper <- if("upper" %in% names(x.args)){ - x.args$upper - } else { - Inf - } - - .ls <- lapply(.ns, function(x){start}) - .ls2 <- lapply(.ns, function(x){lower}) - .ls3 <- lapply(.ns, function(x){upper}) - - #print(.data) - #print(.for) - - mod <- nls(.for, data=.data, - #weights = (1/.out$test)^power, - #no weighting currently because species are all the same here! - start=.ls, - lower=.ls2, - upper=.ls3, - algorithm="port", - control=nls.control(tol=1e-5) #think about tolerance - ) - .ans <- data.frame( - PROFILE_CODE = .data$PROFILE_CODE, - SPECIES_ID = parent$SPECIES_ID[1], - SPECIES_NAME = parent$SPECIES_NAME[1], - t(coefficients(mod)), - test = .data$parent - ) - names(.ans) <- gsub("^n_", "", names(.ans)) - - #print(.ans) - - for(i in .ans$PROFILE_CODE){ - .ii <- subset(.ans, PROFILE_CODE==i) - .ii <- .ii[names(.ii) != "PROFILE_CODE"] - .nn <- pls[[i]]$args$data - .nn <- subset(.nn, !SPECIES_NAME %in% unique(.ii$SPECIES_NAME)) - ########### - #cheat - ############# - #print(i) - #print(names(.nn)) - #print(names(.ii)) - ######################## - - pls[[i]]$args$data <- rbind(.nn, .ii) - #rebuild model - .for <- as.character(formula(pls[[i]]$mod)) - .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) - .ms <- names(pls[[i]]$args$data) - .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] - .ls <- lapply(.ms, function(x){0}) - names(.ls) <- paste("m_", .ms, sep="") - .da <- pls[[i]]$args$data - - - #print(.for) - #note - ############################# - # option to not do this refit? - if(refit.profile){ - pls[[i]]$mod <- nls(.for, data=.da, - weights = (1/.da$test)^power, # think about weighting - start=.ls, lower=.ls, - algorithm="port", - control=control #think about tolerance - ) - } - } - - pls - -} - - - diff --git a/R/speciate.R b/R/speciate.R deleted file mode 100644 index 3b4a764..0000000 --- a/R/speciate.R +++ /dev/null @@ -1,67 +0,0 @@ -#' Access to the SPECIATE 5.1 US/EPA Tool -#' -#' @description \code{\link{spec}} Return a speciate data.frame -#' -#' @param code Character, PROFILE CODE required by EPA/Speciate -#' @return a data.frame with full information for the desired code (PROFILE_CODE) -#' @export -#' @references -#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -#' The development and uses of EPA SPECIATE database. -#' Atmospheric Pollution Research, 1(4), pp.196-206. -#' @examples \dontrun{ -#' code <- "8855" -#' x <- spec(code) -#' } -spec <- function(code) { - - PROFILES <- sysdata$PROFILES - SPECIES <- sysdata$SPECIES - SPECIES_PROPERTIES <- sysdata$SPECIES_PROPERTIES - PROFILE_REFERENCE <- sysdata$PROFILE_REFERENCE - REFERENCES <- sysdata$REFERENCES - - df <- PROFILES[PROFILES$PROFILE_CODE == code, ] - df <- merge(df, - SPECIES, - by = "PROFILE_CODE") - cat("Sum WEIGHT_PERCENT: ", - sum(as.numeric(as.character(df$WEIGHT_PERCENT)), na.rm = T), - "\n") - - df <- merge(df, SPECIES_PROPERTIES, by = "SPECIES_ID") - - df <- merge(df, PROFILE_REFERENCE, by = "PROFILE_CODE") - - df <- merge(df, REFERENCES, by = "REF_Code") - -return(df) - -} - - -#' Find PROFILE_CODE -#' -#' @description \code{\link{find_code}} Return a data.frame with profile codes -#' -#' @param profile Character, to search PROFILE CODE -#' @param by Character, to search code. eg: "Keywords", "PROFILE_NOTES", "PROFILE_TYPE" -#' or other name of PROFILES -#' @return a data.frame with with profile codes -#' @export -#' @references -#' Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -#' Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -#' The development and uses of EPA SPECIATE database. -#' Atmospheric Pollution Research, 1(4), pp.196-206. -#' @examples \dontrun{ -#' profile <- "Ethanol" -#' dt <- find_code(profile) -#' } -find_code <- function(profile, by = "Keywords") { - - PROFILES <- sysdata$PROFILES - - return(PROFILES[grep(profile, PROFILES[[by]]), ]) -} diff --git a/R/spq.R b/R/spq.R deleted file mode 100644 index 7be81c1..0000000 --- a/R/spq.R +++ /dev/null @@ -1,89 +0,0 @@ -#' @name spq -#' @title spq_ quick access to common re(SPECIATE) sub-samples -#' @aliases spq_gas spq_other spq_pm spq_pm.ae6 spq_pm.ae8 spq_pm.cr1 -#' spq_pm.simplified - -#' @description \code{spq_} functions are quick access wrappers to commonly -#' requested re(SPECIATE) sub-samples. -#' @return \code{spq_} functions typically return a \code{respeciate} -#' \code{data.frame} of the requested profiles. -#' -#' For example: -#' -#' \code{sqr_gas} returns all gaseous profiles (\code{PROFILE_TYPE == 'GAS'}). -#' -#' \code{sqr_pm} returns all particulate matter (PM) profiles not classified -#' as a special PM type (\code{PROFILE_TYPE == 'PM'}). -#' -#' The special PM types are subsets profiles intended for special -#' applications, and these include \code{sqr_pm.ae6} (type \code{PM-AE6}), -#' \code{sqr_pm.ae8} (type \code{PM-AE8}), \code{sqr_pm.cr1} (type -#' \code{PM-CR1}), \code{sqr_pm.simplified} (type \code{PM-Simplified}) -#' and \code{sqr_other} (\code{PROFILE_TYPE == 'OTHER'}). -#' - - -############################# -#NOTE -############################ - -#might not be keeping these - -#profile types -#GAS, OTHER, PM, PM-AE6 PM-AE8 PM-CR1 PM-Simplified - -#spq_gas, - - -#' @rdname spq -#' @export - -spq_gas <- function(){ - sp_profile(sp_profile_info("gas", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_other <- function(){ - sp_profile(sp_profile_info("other", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_pm <- function(){ - sp_profile(sp_profile_info("pm", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_pm.ae6 <- function(){ - sp_profile(sp_profile_info("pm-ae6", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_pm.ae8 <- function(){ - sp_profile(sp_profile_info("pm-ae8", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_pm.cr1 <- function(){ - sp_profile(sp_profile_info("pm-cr1", by = "profile_type", partial=FALSE)) -} - -#' @rdname spq -#' @export - -spq_pm.simplified <- function(){ - sp_profile(sp_profile_info("pm-simplified", by = "profile_type", partial=FALSE)) -} - - - - diff --git a/R/xxx.R b/R/xxx.R index 66295d7..e0bccfb 100644 --- a/R/xxx.R +++ b/R/xxx.R @@ -1,21 +1,12 @@ -############################## -#setup code, misc code, -#testing code, etc -############################## - -#currently no hooks, etc... - - ##################### # to think about ##################### # standardise error messages, e.g. RSP> [function]: [issue] \n\t [fix]? -# make respeciate object argument rsp rather than x -# that helps sp_plot..() but maybe not plot() - - +# made main respeciate object argument name rsp rather than x +# that helps rsp_plot..() if it passed args to lattice +# but not sure it really help with plot() if respeciate not loaded... ##################### #to check @@ -25,26 +16,30 @@ # xxx_test and its depends... # (not keeping unless we can get it to work better) +############################## +#setup code, misc code, +#testing code, etc +############################## + +#currently no hooks, etc... +#globals utils::globalVariables(c("sysdata", ".SD", "ans", "control", "PROFILE_CODE", "PROFILE_NAME", "PROFILE_TYPE", "SPECIES_ID", "SPECIES_NAME", "SPEC_MW", "WEIGHT_PERCENT", ".", ".value")) -######################## #to think about... -####################### # all @import here -# in case we have to move to data.table::as.data.table, etc... -# moving to data.table::as.data.table... +# moved to data.table::as.data.table in code... # #' @import data.table # data.table used by: # rsp_test_profile, -# sp_dcast_profile, and those that use dcast? -# sp_species_cor -# sp_profile_distance +# rsp_dcast..., rsp_melt... +# rsp_cor_species +# rsp_distance_profile # and others??? # need to identify them @@ -54,49 +49,138 @@ utils::globalVariables(c("sysdata", ".SD", "ans", "control", #' @importFrom latticeExtra doubleYScale panel.ablineq #' @importFrom data.table ":=" #' @importFrom stats sd cophenetic cor cutree dist hclust heatmap AIC -#' as.formula coefficients formula lm nls nls.control predict update +#' as.formula coefficients formula lm nls nls.control predict update na.omit #' @importFrom utils modifyList head packageVersion #' @importFrom graphics axis barplot par legend lines rect text abline #' grid mtext plot.new plot.window points polygon title #' @importFrom grDevices cm.colors colorRampPalette as.graphicsAnnot #' dev.flush dev.hold heat.colors rainbow +# notes #might be able to drop legend? # check plot.respeciate - +################################ ############################## -#common unexported +## common unexported ############################## +################################ + +# suggesting standardizing naming .rsp_[function_description] + + +#.rsp_ +################################# +# tidy for rsp_x data setup + +#basic build needs +# profile_name and profile_code +# species_name and species_id +# weight_percent (and possibly .value) + +#notes +# think this can go because we now have rsp_build_x??? +# plus I don't think anyone but me (kr) has used it... + +.rsp_ <- function(x){ + .o <- rsp_profile(x) + .o$PROFILE_NAME <- paste("test", .o$PROFILE_NAME, sep=">") + .o$PROFILE_CODE <- "test" + .o +} + + + -#rsp_plot_fix + +#.rsp_split_profile +####################################### +#split respeciate by profile + +#currently not exported +#quick code assumed CODE is unique to profile + +#need to test this + +#not sure we are using this any more ??? +# i think rsp_test, then rsp_test.2 replaced +# and code in plot.respeciate.old ??? + +.rsp_split_profile <- function(x){ + ref <- unique(x$PROFILE_CODE) + lapply(ref, function(y) x[x$PROFILE_CODE==y,]) +} + + + + + + +#.rsp_build_respeciate.... +################################# +# class builds + +# dropped +# rsp_build_respeciate.spcs +# rsp_build_respeciate.ref + +# hoping to drop last one... +# as.respeciate to supersede + +#rsp_build_respeciate.spcs <- +# function(x, ...){ +#build +#add .value +# x <- rsp_tidy_profile(x) +# class(x) <- c("respeciate.spcs", "data.frame") +# x +# } + +#rsp_build_respeciate.ref <- +# function(x, ...){ +#build +# class(x) <- c("respeciate.ref", "data.frame") +# x +# } + +.rsp_build_respeciate <- + function(x, ...){ + x <- as.data.frame(x) + if("WEIGHT_PERCENT" %in% names(x)) { + x$.value <- x$WEIGHT_PERCENT + } + class(x) <- c("respeciate", class(x)) + x + } + + +#.rsp_plot_fix ######################### # general tidy function for data before plotting # merges duplicate species in profiles # makes profile names unique if duplicated # tidies species names for use in labelling +# warns about changes #used by ################### #plot.respeciate -#sp_plot_profile +#rsp_plot_profile -#uses +#uses by #################### -#rsp_tidy_profile -#rsp_test_respeciate -#rsp_test_profile - +#.rsp_tidy_profile +#.rsp_test_respeciate +#.rsp_test_profile - -rsp_plot_fix <- function(x, silent = FALSE, ...){ +.rsp_plot_fix <- function(x, silent = FALSE, ...){ .x.args <- list(...) - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(x) ##test object type - test <- rsp_test_respeciate(x, level=2, silent=TRUE) + test <- .rsp_test_respeciate(x, level=2, silent=TRUE) if(test != "respeciate"){ if(test %in% c("respeciate.profile.ref", "respeciate.species.ref")){ stop("RSP> No plot method for respeciate.reference files.", @@ -108,20 +192,20 @@ rsp_plot_fix <- function(x, silent = FALSE, ...){ #don't stop - respeciate profile } #check for duplicates - x <- rsp_test_profile(x) + x <- .rsp_test_profile(x) if(any(x$.n>1) & !silent){ warning(paste("RSP> found duplicate species in profiles (merged and averaged...)", sep=""), call.=FALSE) } #shorten names for plotting - x$SPECIES_NAME <- rsp_tidy_species_name(x$SPECIES_NAME) + x$SPECIES_NAME <- .rsp_tidy_species_name(x$SPECIES_NAME) #################################### #issue profile names are not always unique #################################### test <- x test$SPECIES_ID <- ".default" - test <- rsp_test_profile(test) + test <- .rsp_test_profile(test) ################### #rep_test #can now replace this with data.table version @@ -167,7 +251,12 @@ rsp_plot_fix <- function(x, silent = FALSE, ...){ ## could also test for .value -rsp_test_respeciate <- function(x, level = 1, +#used by +############################### +#.rsp_plot_fix + + +.rsp_test_respeciate <- function(x, level = 1, silent = FALSE){ test <- class(x) out <- "bad" @@ -209,7 +298,11 @@ rsp_test_respeciate <- function(x, level = 1, ## enabled in plot.respeciate, sp_profile_rescale, sp_profile_dcast ## rsp_test_profile -rsp_tidy_profile <- function(x){ +#used by +############################### +#.rsp_plot_fix + +.rsp_tidy_profile <- function(x){ #.value is local version of weight if(!".value" %in% names(x)){ x$.value <- x$WEIGHT_PERCENT @@ -228,7 +321,6 @@ rsp_tidy_profile <- function(x){ #currently not exported #quick code to tidy species names -#currently used in plot.respeciate #note: not fully tested @@ -237,7 +329,11 @@ rsp_tidy_profile <- function(x){ # option foreshorten any names longer than [n] characters??? # similar function to tidy profile names -rsp_tidy_species_name <- function(x){ +#used by +############################### +#plot.respeciate + +.rsp_tidy_species_name <- function(x){ #attempts shorten names by remove other versions #names seem to be in format a (or b || c) @@ -267,10 +363,14 @@ rsp_tidy_species_name <- function(x){ #file:///C:/Users/trakradmin/Downloads/datatable.pdf ##rsp_test_profile(aa) -rsp_test_profile <- function(x){ +#used by +############################### +#.rsp_plot_fix + +.rsp_test_profile <- function(x){ #set up .value if not there - x <- rsp_tidy_profile(x) + x <- .rsp_tidy_profile(x) ####################################### #track and return original class? @@ -372,41 +472,15 @@ rsp_test_profile <- function(x){ # as.data.frame(out) #} +#################################### +#.rsp_col_key +#################################### +#color key for correlation matrices - - - - - - -##################### -#testing -##################### - -#playing - -#function(x, subset,...){ -# ans <- match.call() -# ans <- as.character(ans) -# return(ans) -#} - -#ggplot example -#require(ggplot2) -#ggplot() + geom_col(aes(y=SPECIES_NAME, x=WEIGHT_PERCENT), data=aa) + facet_grid(.~PROFILE_NAME) - - -############################ -#color key -############################ - -######################## -#using this in: -######################## - -#sp_species_cor - +#used by: +################################## +# rsp_cor_species #started with: #https://stackoverflow.com/questions/9314658/colorbar-from-custom-colorramppalette @@ -485,180 +559,807 @@ rsp_test_profile <- function(x){ # (not sure how that fits with package remit) +.rsp_col_key <- function(key, cols, x, y = NULL, + ticks, nticks, + na.col = "grey", na.cex = 0.25, + title = "", axes, bg, border, + type = 2, + ...){ -rsp_col_key <- function(key, cols, x, y = NULL, - ticks, nticks, - na.col = "grey", na.cex = 0.25, - title = "", axes, bg, border, - type = 2, - ...){ - - #setup - op <- par(no.readonly = TRUE) + #setup + op <- par(no.readonly = TRUE) - if(missing(x)){ - #currently just doing this option - #like key.pos "top-left", key.style = 1 (horizontal, annotation below) - x <- 0.1 + if(missing(x)){ + #currently just doing this option + #like key.pos "top-left", key.style = 1 (horizontal, annotation below) + x <- 0.1 + } + if(is.null(y)){ + y <- 0.9 + } + .min <- min(key, na.rm=TRUE) + .max <- max(key, na.rm=TRUE) + if(missing(ticks)){ + ticks <- pretty(c(.min, .max), 3) + } + if(missing(nticks)){ + nticks <- length(ticks) + } + .na.width <- na.cex * (.max-.min) + if(missing(bg)){ + bg <- "white" + } + if(missing(border)){ + border <- "black" + } + scale <- (length(cols)-1)/(.max-.min) + + #print(.max-.min) + #print(.na.width) + #print(.min) + #print(.max) + + #key.style 1 + if(type==1){ + #horizontal, header before, annotation after + #margins + .mai <- c(0.1,0.1,0.1,0.1) + if(title ==""){ + #no title + .fig <- c(x-0.1, x+0.1, y-0.04, y+0.1) + .wdt <- 12 + } else { + #title + .fig <- c(x-0.1, x+0.1, y-0.12, y+0.1) + .wdt <- 15 } - if(is.null(y)){ - y <- 0.9 + + if(is.na(na.col)){ + .brd <- c(.na.width, .na.width) + } else { + .brd <- c(.na.width, .na.width*2) } - .min <- min(key, na.rm=TRUE) - .max <- max(key, na.rm=TRUE) - if(missing(ticks)){ - ticks <- pretty(c(.min, .max), 3) + + #position col key + par(fig = .fig, mai = .mai, new=TRUE) + + #plot col key + + #region + plot(c(.min -.brd[2], .max+(.brd[1]*0.5)), c(-1, .wdt), + type='n', bty='n', xaxt='n', xlab='', + yaxt='n', ylab='', main="", font.main = 1) + #bg + border + rect(.min -.brd[2], -1, .max+(.brd[1]*0.5), .wdt, + col=bg, border=border) + #title + if(title !=""){ + text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title, col="black", cex=0.75) } - if(missing(nticks)){ - nticks <- length(ticks) + #col scale + for (i in 1:(length(cols)-0)) { + x <- (i-1)/scale + .min + rect(x,5,x+1/scale,10,col=cols[i], border=NA) } - .na.width <- na.cex * (.max-.min) - if(missing(bg)){ - bg <- "white" + #axes + lines(c(.min, .max), c(5,5), col="black") + for (i in ticks) { + lines(c(i,i), c(5,4),col="black") } - if(missing(border)){ - border <- "black" + #axes annotation + text(ticks, rep(2, length(ticks)), labels=ticks, + cex=0.75, adj=0.5) + #na block + if(!is.na(na.col)){ + rect(.min-(.na.width*0.5), 5,.min-(.na.width*1.5), 10, col=na.col, border="black") + text(.min-.na.width, 2, labels="NA", col="black", cex=0.75) } - scale <- (length(cols)-1)/(.max-.min) - -#print(.max-.min) -#print(.na.width) - #print(.min) - #print(.max) - - #key.style 1 - if(type==1){ - #horizontal, header before, annotation after - #margins - .mai <- c(0.1,0.1,0.1,0.1) - if(title ==""){ - #no title - .fig <- c(x-0.1, x+0.1, y-0.04, y+0.1) - .wdt <- 12 - } else { - #title - .fig <- c(x-0.1, x+0.1, y-0.12, y+0.1) - .wdt <- 15 - } - if(is.na(na.col)){ - .brd <- c(.na.width, .na.width) - } else { - .brd <- c(.na.width, .na.width*2) - } + } - #position col key - par(fig = .fig, mai = .mai, new=TRUE) + if(type==2){ + #horizontal, header before, annotation after + #margins + .mai <- c(0.1,0.1,0.1,0.1) + if(title ==""){ + #no title + .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1) + .wdt <- 12 + } else { + #title + .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1) + .wdt <- 15 + } - #plot col key + if(is.na(na.col)){ + .brd <- c(.na.width, .na.width) + } else { + .brd <- c(.na.width, .na.width*2) + } - #region - plot(c(.min -.brd[2], .max+(.brd[1]*0.5)), c(-1, .wdt), - type='n', bty='n', xaxt='n', xlab='', - yaxt='n', ylab='', main="", font.main = 1) - #bg + border - rect(.min -.brd[2], -1, .max+(.brd[1]*0.5), .wdt, - col=bg, border=border) - #title - if(title !=""){ - text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title, col="black", cex=0.75) - } - #col scale - for (i in 1:(length(cols)-0)) { - x <- (i-1)/scale + .min - rect(x,5,x+1/scale,10,col=cols[i], border=NA) - } - #axes - lines(c(.min, .max), c(5,5), col="black") - for (i in ticks) { - lines(c(i,i), c(5,4),col="black") - } - #axes annotation - text(ticks, rep(2, length(ticks)), labels=ticks, - cex=0.75, adj=0.5) - #na block - if(!is.na(na.col)){ - rect(.min-(.na.width*0.5), 5,.min-(.na.width*1.5), 10, col=na.col, border="black") - text(.min-.na.width, 2, labels="NA", col="black", cex=0.75) - } + #position col key + par(fig = .fig, mai = .mai, new=TRUE) + + #plot col key + + #region + plot(c(-1, .wdt), c(.min-.brd[2], .max+(.brd[1]*0.5)), + type='n', bty='n', xaxt='n', xlab='', + yaxt='n', ylab='', main="", font.main = 1) + #bg + border + rect(-1, .min-.brd[2], .wdt, .max+(.brd[1]*0.5), + col=bg, border=border) + #title + if(title !=""){ + text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title, + col="black", cex=0.75) + } + #for (i in 1:(length(lut)-1)) { + # y = (i-1)/scale + min + # rect(0,y,10,y+1/scale, col=lut[i], border=NA) + #} + + #col scale + #################### + #note + #################### + #this needs work because rect needs colored border + #which kills transparent ranges... + #does that matter + for (i in 1:(length(cols))) { + y <- (i-1)/scale + .min + rect(5,y-(1/scale),10,y,col=cols[i], border=cols[i]) + } + #axes + lines(c(5,5), c(.min, .max), col="black") + for (i in ticks) { + lines(c(5,4), c(i,i), col="black") + } + #axes annotation + text(rep(2, length(ticks)), ticks, labels=ticks, + cex=0.75, adj=0.5) + #na block + if(!is.na(na.col)){ + rect(5, .min-(.na.width*0.5), 10, .min-(.na.width*1.5), col=na.col, border="black") + text(2, .min-.na.width, labels="NA", col="black", cex=0.75) } - if(type==2){ - #horizontal, header before, annotation after - #margins - .mai <- c(0.1,0.1,0.1,0.1) - if(title ==""){ - #no title - .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1) - .wdt <- 12 - } else { - #title - .fig <- c(x-0.05, x+0.05, y-0.2, y+0.1) - .wdt <- 15 - } + } - if(is.na(na.col)){ - .brd <- c(.na.width, .na.width) - } else { - .brd <- c(.na.width, .na.width*2) - } + par(op) +} - #position col key - par(fig = .fig, mai = .mai, new=TRUE) +#plot(iris$Sepal.Length, iris$Sepal.Width) +#rsp_col_key(c(1,-1), colorRampPalette(c("light green", "yellow", "orange", "red"))(100), title="testing") - #plot col key - #region - plot(c(-1, .wdt), c(.min-.brd[2], .max+(.brd[1]*0.5)), - type='n', bty='n', xaxt='n', xlab='', - yaxt='n', ylab='', main="", font.main = 1) - #bg + border - rect(-1, .min-.brd[2], .wdt, .max+(.brd[1]*0.5), - col=bg, border=border) - #title - if(title !=""){ - text(.min+((.max-.min)/2)+(.na.width*0.75), 13, labels=title, - col="black", cex=0.75) - } - #for (i in 1:(length(lut)-1)) { - # y = (i-1)/scale + min - # rect(0,y,10,y+1/scale, col=lut[i], border=NA) - #} - - #col scale - #################### - #note - #################### - #this needs work because rect needs colored border - #which kills transparent ranges... - #does that matter - for (i in 1:(length(cols))) { - y <- (i-1)/scale + .min - rect(5,y-(1/scale),10,y,col=cols[i], border=cols[i]) + +################ +################ +## unexported +## from pls.plot... +################ +################ + +# profile code order +# get profile order in case you need it latter... + +.rsp_profile_code_order <- function(data){ + .tmp <- data.table::as.data.table(data)[, .(ans=length(unique(PROFILE_CODE))),by="SPECIES_NAME"] + .tmp <- subset(.tmp, ans == max(.tmp$ans, na.rm=TRUE))$SPECIES_NAME + .tmp <- subset(data, SPECIES_NAME %in% .tmp) + sort(unique(.tmp$PROFILE_CODE)) +} + + +#log axis hander +#based on lattice text book method + +#issues?? +# could be problem with y padding when log=T and .value range is wide... + +.rsp_yscale.component.log10 <- function(lim, ...) { + ans <- yscale.components.default(lim = lim, ...) + tick.at <- pretty(lim) + tick.at <- tick.at[tick.at == floor(tick.at)] + tick.at <- tick.at[tick.at < max(lim, na.rm=TRUE) & tick.at > min(lim, na.rm=TRUE)] + ans$left$ticks$at <- tick.at + ans$left$labels$at <- tick.at + ans$left$labels$labels <- c(format(10^(tick.at), + drop0trailing = TRUE, + scientific = FALSE)) + #print(ans$left$labels$labels) + ####################### + #need to sort of right labeling + # dropped for now... + #ans$right <- ans$left + ans +} + + +#lattice panel pal +#based on panel handler in loa + +.rsp_panelPal <- function(.name, .ls, .panel, ...){ + .x.args <- list(...) + if(!.name %in% names(.x.args) || !is.logical(.x.args[[.name]]) || + .x.args[[.name]]){ + .name2 <- paste("^", .name, "[.]", sep="") + if(.name %in% names(.x.args) && is.list(.x.args[[.name]])){ + .tmp <- .x.args[[.name]] + if(length(.tmp)>0){ + names(.tmp) <- paste(.name, names(.tmp), sep=".") + .x.args <- modifyList(.tmp, .x.args) } - #axes - lines(c(5,5), c(.min, .max), col="black") - for (i in ticks) { - lines(c(5,4), c(i,i), col="black") + } + .x.args <- .x.args[grepl(.name2, names(.x.args))] + if(length(.x.args)>0){ + names(.x.args) <- gsub(.name2, "", names(.x.args)) + .ls <- modifyList(.ls, .x.args) + } + do.call(.panel, .ls) + } +} + + + +# could move this into the function... + +.rsp_panel.pie <- + function (x, y=NULL, groups=NULL, subscripts, totals=NULL, + labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE, + init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45, + col = NULL, border = 1, lty = NULL, main = NULL, ...) + { + + #this is graphics::pie with a couple of modifications... + #many thanks to... + #R Core Team (2023). _R: A Language and Environment for Statistical Computing_. R Foundation + #for Statistical Computing, Vienna, Austria. . + + #if (!is.numeric(x) || any(is.na(x) | x < 0)) + # stop("'x' values must be positive.") + + ######################### + #measurement totals + .y <- totals[subscripts] + ref <- sapply(unique(groups), function(g){ + sum(.y[groups==g], na.rm=TRUE) + }) + .total <- mean(ref, na.rm=TRUE) + + ########################## + #profile contributions to model + # as percentage of measurements + ans <- sapply(unique(groups), function(g){ + sum(y[groups==g], na.rm=TRUE) + }) + ans <- (ans / .total) * 100 + + ##################### + #cheat because following comes from + #pie function in base r... + x <- ans + + if (is.null(labels)) + labels <- as.character(unique(groups)) + else labels <- as.graphicsAnnot(labels) + labels = paste(labels, " (", + round(ans, digits=1), "%)", sep = "") + + if (any(x == 0)) { + labels <- labels[x != 0] + col <- col[x != 0] + x <- x[x != 0] + } + my.tot <- sum(x, na.rm=TRUE) + ######################## + #this adds extra void area + # if does not account for + # 99 percent of the + # measurements + if (my.tot < 99) { + x <- c(x, 100 - my.tot) + labels <- c(labels, "[hide]") + col <- c(col, NA) + init.angle <- init.angle + (((100 - my.tot)/200) * 360) + } + x <- c(0, cumsum(x)/sum(x)) + dx <- diff(x) + nx <- length(dx) + + ###################### + #???? + pin <- par("pin") + xlim <- ylim <- c(-1, 1) + if (pin[1L] > pin[2L]) + xlim <- (pin[1L]/pin[2L]) * xlim + else ylim <- (pin[2L]/pin[1L]) * ylim + + ######################## + #col setting + # this needs generalising like + # other pls_plot + if (is.null(col)) + col <- if (is.null(density)) + c("white", "lightblue", "mistyrose", "lightcyan", + "lavender", "cornsilk") + else par("fg") + + ######################## + #border setting + # needs generalising... + if (!is.null(border)) + border <- rep_len(border, nx) + + ############## + #lty + # needs generalising... + if (!is.null(lty)) + lty <- rep_len(lty, nx) + + ############## + #angle of segment + angle <- rep(angle, nx) + if (!is.null(density)) + density <- rep_len(density, nx) + twopi <- if (clockwise) + -2 * pi + else 2 * pi + t2xy <- function(t) { + t2p <- twopi * t + init.angle * pi/180 + list(x = radius * cos(t2p), y = radius * sin(t2p)) + } + ########################### + #like to nudge these if percent before and + # this one are both small + # (making labels close) + + for (i in 1L:nx) { + if (!as.character(labels[i]) == "[hide]") { + n <- max(2, floor(edges * dx[i])) + P <- t2xy(seq.int(x[i], x[i + 1], length.out = n)) + lattice::lpolygon(c(P$x, rev(P$x * 0.5)), c(P$y, rev(P$y * + 0.5)), density = density[i], angle = angle[i], + border = border[1], col = col[i], lty = lty[i]) + P <- t2xy(mean(x[i + 0:1])) + lab <- as.character(labels[i]) + if (!is.na(lab) && nzchar(lab)) { + lattice::llines(c(1, 1.2) * P$x, c(1, 1.2) * P$y) + lattice::ltext(1.3 * P$x, 1.3 * P$y, labels[i], xpd = TRUE, + cex=0.7, adj = ifelse(P$x < 0, 1, 0), ...) + } } - #axes annotation - text(rep(2, length(ticks)), ticks, labels=ticks, - cex=0.75, adj=0.5) - #na block - if(!is.na(na.col)){ - rect(5, .min-(.na.width*0.5), 10, .min-(.na.width*1.5), col=na.col, border="black") - text(2, .min-.na.width, labels="NA", col="black", cex=0.75) + } + lattice::ltext(0, 0, label = paste("sum\n", signif(my.tot, 3), "%", + sep = ""), cex=0.7) + } + + + + + +#think about +####################################### +# printing amount missing as a segment +# adding plot arg control like in plot.respeciate +# adding args to change the displacement of labels + +.rsp_profile_pie <- function (x, labels = names(x), edges = 200, radius = 0.8, + clockwise = FALSE, + init.angle = if (clockwise) 90 else 0, + density = NULL, angle = 45, col = NULL, + border = NULL, lty = NULL, main = NULL, ...) +{ + #this is graphics::pie with a couple of modifications... + #many thanks to... + #R Core Team (2023). _R: A Language and Environment for Statistical Computing_. R Foundation + #for Statistical Computing, Vienna, Austria. . + + #print(labels) + #print(col) + + if (!is.numeric(x) || any(is.na(x) | x < 0)) + stop("'x' values must be positive.") + if (is.null(labels)) + labels <- as.character(seq_along(x)) + else labels <- as.graphicsAnnot(labels) + + #added to remove any source with a zero contribution + #but hold labels and col alignment + if(any(x==0)){ + labels <- labels[x!=0] + col <- col[x!=0] + x <- x[x!=0] + } + my.tot <- sum(x) + if(my.tot < 99){ + x <- c(x, 100-my.tot) + labels <- c(labels, "[hide]") + col <- c(col, NA) + init.angle <- init.angle + (((100-my.tot)/200)*360) + } + + x <- c(0, cumsum(x)/sum(x)) + dx <- diff(x) + nx <- length(dx) + plot.new() + pin <- par("pin") + xlim <- ylim <- c(-1, 1) + if (pin[1L] > pin[2L]) + xlim <- (pin[1L]/pin[2L]) * xlim + else ylim <- (pin[2L]/pin[1L]) * ylim + dev.hold() + on.exit(dev.flush()) + plot.window(xlim, ylim, "", asp = 1) + if (is.null(col)) + col <- if (is.null(density)) + c("white", "lightblue", "mistyrose", "lightcyan", + "lavender", "cornsilk") + else par("fg") + # if (!is.null(col)) + # col <- rep_len(col, nx) + if (!is.null(border)) + border <- rep_len(border, nx) + if (!is.null(lty)) + lty <- rep_len(lty, nx) + angle <- rep(angle, nx) + if (!is.null(density)) + density <- rep_len(density, nx) + twopi <- if (clockwise) + -2 * pi + else 2 * pi + t2xy <- function(t) { + t2p <- twopi * t + init.angle * pi/180 + list(x = radius * cos(t2p), y = radius * sin(t2p)) + } + + for (i in 1L:nx) { + + if(!as.character(labels[i]) == "[hide]"){ + n <- max(2, floor(edges * dx[i])) + P <- t2xy(seq.int(x[i], x[i + 1], length.out = n)) + #changed shape to include hole + polygon(c(P$x, rev(P$x*0.5)), c(P$y, rev(P$y*0.5)), + density = density[i], angle = angle[i], + border = border[i], col = col[i], lty = lty[i]) + P <- t2xy(mean(x[i + 0:1])) + lab <- as.character(labels[i]) + if (!is.na(lab) && nzchar(lab)) { + # 1.2 and 1.3 are the extenders when moving labels way from + # the pie plot itself + lines(c(1, 1.2) * P$x, c(1, 1.2) * P$y) + text(1.3 * P$x, 1.3 * P$y, labels[i], xpd = TRUE, + adj = ifelse(P$x < 0, 1, 0), ...) } + } + } + + text(0,0, label=paste("sum\n",signif(my.tot, 3), "%", sep="")) + title(main = main, ...) + invisible(NULL) +} + + +########################### +########################### +## pls_refit_species +########################### +########################### + + +# superseded by pls_fit_species +# not not exported + +# need to update the model handling so it is like sp_pls_profile +# this would sort power issue above +# also means the user can change setting themselves +# THINK ABOUT THIS +# they could make a pls that was not positively constrained + + +.rsp_pls_refit_species <- function(pls, name, power=1, + ...){ + .xx <- pls_report(pls) + #name might want to be case-non-sensitive at some point + #think about how to do this one... + .data <- .xx[.xx$SPECIES_NAME==name,] + #get and hold all the m_ values + #update profile contributions for named species + .ms <- names(.data)[grepl("^m_", names(.xx))] + .xs <- gsub("^m_", "", .ms) + .for <- paste("(`", .ms, "`*`", .xs, "`)", + sep="", collapse = "+") + .for <- as.formula(paste("test~", .for)) + .da <- .data[!names(.data) %in% .xs] + + + .ls <- lapply(.xs, function(x){0}) + names(.ls) <- .xs + + ################# + #user might want to set this??? + + .ls2 <- lapply(.xs, function(x){.data[1, x]}) + names(.ls2) <- .xs + + mod <- nls(.for, data=.da, + #weights = 1/(.out$test^push), # think about weighting + start=.ls2, lower=.ls, + algorithm="port", + control=nls.control(tol=1e-5) #think about tolerance + ) + + .data[.xs] <- data.frame(t(coefficients(mod))) + + #lazy + .ans <- .data + + for(i in .ans$PROFILE_CODE){ + .ii <- subset(.ans, PROFILE_CODE==i) + .ii <- .ii[names(.ii) %in% names(pls[[i]]$args$data)] + .sp.ord <- unique(pls[[i]]$args$data$SPECIES_ID) + pls[[i]]$args$data <- subset(pls[[i]]$args$data, SPECIES_NAME!=name) + pls[[i]]$args$data <- rbind(pls[[i]]$args$data, .ii) + #put back in right order + pls[[i]]$args$data <- + pls[[i]]$args$data[order(ordered(pls[[i]]$args$data$SPECIES_ID, + levels=.sp.ord)),] + #rebuild model + .for <- as.character(formula(pls[[i]]$mod)) + .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) + .ms <- names(pls[[i]]$args$data) + .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] + .ls <- lapply(.ms, function(x){0}) + names(.ls) <- paste("m_", .ms, sep="") + .da <- pls[[i]]$args$data + + pls[[i]]$mod <- nls(.for, data=.da, + weights = (1/.da$test)^power, # think about weighting + start=.ls, lower=.ls, + algorithm="port", + control=nls.control(tol=1e-5, + warnOnly = TRUE) #think about tolerance + ) + } + + invisible(pls) + +} + + + +#################################### +#################################### +## pls_fit_parent +#################################### +#################################### + +# superseded by pls_fit_species +# not now exported + +# (like pls_refit_species) +# like to drop power from formals +# maybe ignore or pass overwrites via ...? + +# need to update the model handling so it is like sp_pls_profile +# this would sort power issue above +# also means the user can change setting themselves +# THINK ABOUT THIS +# they could make a pls that was not positively constrained +# this would also remove the start, lower and upper options +# from the formals... + +# parent could already be in x +# then parent could just be the name of parent??? + +# also a case for using this to add a non-parent to x +# e.g. pls_fit_unknown_species... +# to fit a species to the existing model as a source apportion of +# that unknown... +# in which case maybe this should just be a wrapper for that +# with the start, lower and upper like below + +# if we are setting start and lower +# start = lower if start is missing might be safer... + + +.rsp_pls_fit_parent <- function(pls, parent, power=1, + start=100, + lower=50, upper=200, ...){ + + .out <- pls_report(pls) + #parent should only have one species + #and have same profiles as pls model data + #and its contribution to all sources is set by .value + + .out <- subset(.out, SPECIES_ID == unique(.out$SPECIES_ID)[1]) + .test <- c("PROFILE_CODE", ".value", "WEIGHT_PERCENT") + .test <- names(parent)[names(parent) %in% .test] + .data <- parent[.test] + names(.data)[2] <- "parent" + .data <- merge(.out, .data[c(1:2)]) + + #formula + .ms <- names(.data)[grepl("^m_", names(.out))] + .for <- paste("(`", .ms, "`*`", gsub("^m_", "n_", .ms), "`)", + sep="", collapse = "+") + .for <- as.formula(paste("parent~", .for)) + + .ns <- .ms + names(.ns) <- gsub("^m_", "n_", .ms) + .ls <- lapply(.ns, function(x){start}) + .ls2 <- lapply(.ns, function(x){lower}) + .ls3 <- lapply(.ns, function(x){upper}) + + mod <- nls(.for, data=.data, + #weights = (1/.out$test)^power, # think about weighting + start=.ls, + lower=.ls2, + upper=.ls3, + algorithm="port", + control=nls.control(tol=1e-5) #think about tolerance + ) + .ans <- data.frame( + PROFILE_CODE = .data$PROFILE_CODE, + SPECIES_ID = parent$SPECIES_ID[1], + SPECIES_NAME = parent$SPECIES_NAME[1], + t(coefficients(mod)), + test = .data$parent + ) + names(.ans) <- gsub("^n_", "", names(.ans)) + for(i in .ans$PROFILE_CODE){ + .ii <- subset(.ans, PROFILE_CODE==i) + .ii <- .ii[names(.ii) != "PROFILE_CODE"] + pls[[i]]$args$data <- + rbind(pls[[i]]$args$data, .ii) + #rebuild model + .for <- as.character(formula(pls[[i]]$mod)) + .for <- as.formula(paste(.for[2], .for[1], .for[3], sep="")) + .ms <- names(pls[[i]]$args$data) + .ms <- .ms[!.ms %in% c("SPECIES_ID", "SPECIES_NAME", "test")] + .ls <- lapply(.ms, function(x){0}) + names(.ls) <- paste("m_", .ms, sep="") + .da <- pls[[i]]$args$data + + pls[[i]]$mod <- nls(.for, data=.da, + weights = (1/.da$test)^power, # think about weighting + start=.ls, lower=.ls, + algorithm="port", + control=nls.control(tol=1e-5) #think about tolerance + ) + } + + pls + +} + + + + + +####################################### +######################################## +## .rsp_get_[something]_from_pls +##################################### +####################################### + +#for use with pls outputs + +#note: these current expect pls_report([rsp_pls]) as ONLY input dat + +.rsp_get_m_from_pls <- function(dat){ + + #get m profiles from a pls model + ############################################# + + #currently assumes you are giving it pls_report output... + # + + #get m data + ########################### + .refs <- names(dat)[grepl("^[.]m_", names(dat))] + .tmp <- dat[c("SPECIES_NAME", .refs)] + .tmp <- .tmp[!duplicated(.tmp$SPECIES_NAME),] + + #restructure + ######################### + #renaming columns + .tmp <- data.table::melt.data.table(data.table::as.data.table(.tmp), + id.var="SPECIES_NAME") + .tmp <- as.data.frame(.tmp) + names(.tmp)[names(.tmp)=="variable"] <- "PROFILE_CODE" + .tmp$PROFILE_CODE <- as.character(.tmp$PROFILE_CODE) + .tmp$PROFILE_CODE <- gsub("^.m_", "", .tmp$PROFILE_CODE) + names(.tmp)[names(.tmp)=="value"] <- ".value" + #addition cheats so it is respeciate-like + .tmp$PROFILE_NAME <- .tmp$PROFILE_CODE + .tmp$SPECIES_ID <- .tmp$SPECIES_NAME + .tmp$WEIGHT_PERCENT <- .tmp$.value + ##similay using rsp_build_x + ##makes rsp_x but some codes may not be assigned... + #.p1.prof <- unique(.tmp$PROFILE_CODE) + #.ans <- rsp_build_x(.tmp, test.rsp=FALSE) + #.cheat <- .ans$SPECIES_ID[is.na(.ans$SPECIES_ID)] + #if(length(.cheat)>0){ + # .ans$SPECIES_ID[is.na(.ans$SPECIES_ID)] <- .ans$SPECIES_NAME[is.na(.ans$SPECIES_ID)] + #} + + #output + #want to be rsp_x at some point... + .tmp +} + +.rsp_get_prop_from_pls <- function(dat){ + + #get x/.value table from pls model... + ######################################### + + #currently assumes you are giving it pls_report output... + + #get x data, etc + .tmp <- names(dat) + .tmp <- .tmp[grep("^.x_", .tmp)] + .refs <- c(.tmp, "pred") + .sp.ref <- unique(dat$SPECIES_NAME) + #make summary pls. prop.table + .ans2 <- lapply(.sp.ref, function(x){ + .tmp <- subset(dat, SPECIES_NAME==x) + .d2 <- .tmp[1, c("SPECIES_NAME", .refs)] + for(.ref in .refs){ + #use only paired cases to calculate skew... + .tmp2 <- .tmp[c(.ref, ".value")] + .tmp2[.tmp2==0] <- NA + .tmp2 <- na.omit(.tmp2) + .d2[, .ref] <- sum(.tmp2[,.ref], na.rm=TRUE) / sum(.tmp2[,".value"], na.rm=TRUE) } + .d2 + }) + .ans2 <- do.call(rbind, .ans2) + + #restructure to output + .ans2 <- .ans2[names(.ans2)!="pred"] + .ans2 <- data.table::melt(data.table::as.data.table(.ans2), + id.var="SPECIES_NAME") + .ans2 <- as.data.frame(.ans2) + names(.ans2)[names(.ans2)=="variable"] <- "PROFILE_CODE" + .ans2$PROFILE_CODE <- gsub("^[.]x_", "", as.character(.ans2$PROFILE_CODE)) + names(.ans2)[names(.ans2)=="value"] <- ".prop" + + #output + .ans2 - par(op) } -#plot(iris$Sepal.Length, iris$Sepal.Width) -#rsp_col_key(c(1,-1), colorRampPalette(c("light green", "yellow", "orange", "red"))(100), title="testing") + + + + + + + + + +##################### +#testing +##################### + +#playing + +#function(x, subset,...){ +# ans <- match.call() +# ans <- as.character(ans) +# return(ans) +#} + +#ggplot example +#require(ggplot2) +#ggplot() + geom_col(aes(y=SPECIES_NAME, x=WEIGHT_PERCENT), data=aa) + facet_grid(.~PROFILE_NAME) + + diff --git a/README.Rmd b/README.Rmd index 1773459..0f0e005 100644 --- a/README.Rmd +++ b/README.Rmd @@ -31,7 +31,7 @@ Find profiles based on search criteria ```{r } library(respeciate) -x <- sp_find_profile("Ethanol") +x <- rsp_find_profile("Ethanol") x ``` @@ -39,7 +39,7 @@ x ## speciate ```{r } -p <- sp_profile("8833") +p <- rsp_profile("8833") ``` ## plot @@ -54,7 +54,7 @@ plot(p) ... using lattice barchart syntax ```{r fig.width=12, fig.height=5, fig.align="center", dpi=400} -p2 <- sp_profile(c(8833, 8850)) +p2 <- rsp_profile(c(8833, 8850)) plot(p2, key=list(space="top")) ``` diff --git a/README.md b/README.md index ac3a1d1..4a8027a 100644 --- a/README.md +++ b/README.md @@ -19,23 +19,23 @@ Find profiles based on search criteria ``` r library(respeciate) -x <- sp_find_profile("Ethanol") +x <- rsp_find_profile("Ethanol") x -#> respeciate profile reference -#> 0291 1070 1071 1132 1149 1301 1302 1303 1304 1314 5473 5474 5475 5477 5478 5479 -#> 5481 5482 5483 5485 5486 5487 5489 5490 5491 5493 5494 5495 5496 5497 5498 5499 -#> 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 -#> 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 -#> 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 -#> 8200 8210 8215 8733 8736 8751 8751a 8752 8754 8755 8757 8758 8760 8761 8763 -#> 8764 8765 8766 8767 8768 ... -#> > 160 profiles [showing first 100] +#> respeciate profile list: 160 +#> [NO SPECIES] +#> (CODE 0291) Surface Coating Solvent - Methyl Alcohol +#> (CODE 1070) Alcohols Production - Methanol - Purge Gas Vent +#> (CODE 1071) Alcohols Production - Methanol - Distillation Vent +#> (CODE 1132) Ethanolamines +#> (CODE 1149) Methanol +#> (CODE 1301) 10% Ethanol Composite (Hot Soak + Diurnal) Evaporative +#> > showing 6 of 160 ``` ## speciate ``` r -p <- sp_profile("8833") +p <- rsp_profile("8833") ``` ## plot @@ -52,7 +52,7 @@ plot(p) … using lattice barchart syntax ``` r -p2 <- sp_profile(c(8833, 8850)) +p2 <- rsp_profile(c(8833, 8850)) plot(p2, key=list(space="top")) ``` diff --git a/docs/404.html b/docs/404.html index 4e9525c..9c4c3c2 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ respeciate - 0.2.6 + 0.3.0 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 8d2a259..0267d7f 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ respeciate - 0.2.6 + 0.3.0 diff --git a/docs/authors.html b/docs/authors.html index 14e7d53..2133874 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ respeciate - 0.2.6 + 0.3.0 @@ -65,15 +65,15 @@

Citation

-

Ibarra-Espinosa S, Ropkins K (2023). +

Ibarra-Espinosa S, Ropkins K (2024). respeciate: Speciation profiles for gases and aerosols. -R package version 0.2.6, https://github.com/atmoschem/respeciate. +R package version 0.3.0, https://github.com/atmoschem/respeciate.

@Manual{,
   title = {respeciate: Speciation profiles for gases and aerosols},
   author = {Sergio Ibarra-Espinosa and Karl Ropkins},
-  year = {2023},
-  note = {R package version 0.2.6},
+  year = {2024},
+  note = {R package version 0.3.0},
   url = {https://github.com/atmoschem/respeciate},
 }
diff --git a/docs/index.html b/docs/index.html index 73885f3..3fb1df8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -18,9 +18,8 @@ - + respeciate: Speciation profiles for gases and aerosols — respeciate-package • respeciaterespeciate.generics — respeciate.generics • respeciate(re)SPECIATE data averaging functions — rsp.average • respeciate + + +
+
+ + + +
+
+ + +
+

Functions to build composite (re)SPECIATE profiles

+

rsp_average_profile generates an average composite +of a supplied multi-profile respeciate object.

+
+ +
+
rsp_average_profile(rsp, code = NULL, name = NULL, method = 1, ...)
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
code
+

required character, the unique profile code to assign to the +average profile.

+ + +
name
+

character, the profile name to assign to the average +profile. If not supplied, this defaults to a collapsed list of the codes +of all the profiles averaged.

+ + +
method
+

numeric, the averaging method to apply: Currently only 1 (default) +mean(rsp).

+ + +
...
+

additional arguments, currently ignored

+ +
+
+

Value

+ + +

rsp_average_profile returns a single profile average +version of the supplied respeciate profile.

+
+
+

Note

+

In development function; arguments and outputs likely to be subject to +change.

+

This is one of the very few respeciate functions that modifies the +WEIGHT_PERCENT column of the respectiate data.frame.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.build.html b/docs/reference/rsp.build.html new file mode 100644 index 0000000..f9be556 --- /dev/null +++ b/docs/reference/rsp.build.html @@ -0,0 +1,143 @@ + +Building respeciate-like Objects — rsp.build • respeciate + + +
+
+ + + +
+
+ + +
+

rsp function(s) to reconfigure data.frames (and similar +object classes) for use with data and functions in re(SPECIATE).

+
+ +
+
rsp_build_x(
+  x,
+  profile_code,
+  profile_name,
+  species_name,
+  species_id,
+  value,
+  ...
+)
+
+ +
+

Arguments

+
x
+

data.frame or similar (i.e. +something that can be coerced into a data.frame using +as.data.frame) to be converted into a respeciate object.

+ + +
profile_name, profile_code
+

(character) The name of the column +in x containing profile name and code records, respectively. If not +already named according to SPECIATE conventions, at least one of these will +need to be assigned.

+ + +
species_name, species_id
+

(character) The name of the column +in x containing species name and id records, respectively. If not +already named according to SPECIATE conventions, at least one of these will +need to be assigned.

+ + +
value
+

(character) The name of the column in x +containing measurement values. If not already named according to SPECIATE +conventions, this will need to be assigned.

+ + +
...
+

(any other arguments) currently ignored.

+ +
+
+

Value

+ + +

rsp_builds attempt to build and return a (re)SPECIATE-like +object that can be compared with data from re(SPECIATE).

+
+
+

Note

+

If you want to compare your data with profiles in the SPECIATE archive, +you need to use EPA SPECIATE conventions when assigning species names and +identifiers. Currently, we are working on options to improve on this (and +very happy to discuss if anyone has ideas), but current best suggestion is: +(1) identify the SPECIATE species code for each of the species in your data set, +and (2) assign these as species_id when rsp_building. The +function will then associate the species_name from SPECIATE species +records.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.cluster.html b/docs/reference/rsp.cluster.html new file mode 100644 index 0000000..25de91b --- /dev/null +++ b/docs/reference/rsp.cluster.html @@ -0,0 +1,115 @@ + +(re)SPECIATE profile cluster analysis methods — rsp.cluster • respeciate + + +
+
+ + + +
+
+ + +
+

(re)SPECIATE functions for studying similarities (or +dissimilarities) within (re)SPECIATE data sets

+

rsp_distance_profile calculates the statistical distance +between re(SPECIATE) profiles, and clusters profiles according to nearness.

+
+ +
+
rsp_distance_profile(rsp, output = c("plot", "report"))
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
output
+

Character vector, required function output: 'report' the +calculated distance matrix; 'plot' a heat map of that distance +matrix.

+ +
+
+

Value

+ + +

Depending on the output option, sp_distance_profile returns +one or more of the following: the correlation matrix, a heat map of the +correlation matrix.

+
+
+

Note

+

Please note: function in development; structure and arguments may be +subject to change.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.cor.html b/docs/reference/rsp.cor.html new file mode 100644 index 0000000..356602e --- /dev/null +++ b/docs/reference/rsp.cor.html @@ -0,0 +1,157 @@ + +(re)SPECIATE Species Correlations — rsp.cor • respeciate + + +
+
+ + + +
+
+ + +
+

(re)SPECIATE functions for studying relationships between +species in (re)SPECIATE data sets.

+

rsp_cor_species generates a by-species correlation +matrix of the supplied (re)SPECIATE data sets.

+
+ +
+
rsp_cor_species(
+  rsp,
+  min.n = 3,
+  cols = c("#80FFFF", "#FFFFFF", "#FF80FF"),
+  na.col = "#CFCFCF",
+  heatmap.args = TRUE,
+  key.args = TRUE,
+  report = "silent"
+)
+
+ +
+

Arguments

+
rsp
+

respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
min.n
+

numeric (default 3), the minimum number of species measurements +needed in a profile for the function to use it in correlation calculations. +Here, it should be noted that this does not guarantee the three matched +pairs of measurements needed to calculate a correlation coefficient because +not all profiles contain all species, so there may still be insufficient +overlap on a case-by-case basis.

+ + +
cols
+

a series of numeric, character or other class values +that can be translated into a color gradient, used to color valid cases when +generating plots and color keys, default c("#80FFFF", "#FFFFFF", "#FF80FF") +equivalent to cm.colors output.

+ + +
na.col
+

numeric, character or other class that can be +translated into a single color, used to color NAs when generating +plots and color keys, default grey "#CFCFCF".

+ + +
heatmap.args
+

logical or list, heat map settings. Options +include TRUE (default) to generate the heat map without modification; +FALSE to not plot it; or a list of heat map options to alter the plot +default appearance. The plot, a standard heat map with the dendrograms +removed, is generated using heatmap, so see associated +documentation for valid options.

+ + +
key.args
+

logical or list, color key settings if plotting +the correlation matrix heat map. Options include TRUE (default) to +generate the key without modification; FALSE to not include the key; +or a list of options to alter the key appearance.

+ + +
report
+

logical or character, the required function +output. Options include: 'silent' (default), to return the +correlation matrix invisibly; TRUE to return the matrix +(visibly); and, FALSE to not return it.

+ +
+
+

Value

+ + +

By default rsp_cor_species invisibly returns the calculated +correlation matrix a plots it as a heat map, but arguments including +heatmap and report can be used to modify function outputs.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.html b/docs/reference/rsp.html new file mode 100644 index 0000000..88c2346 --- /dev/null +++ b/docs/reference/rsp.html @@ -0,0 +1,137 @@ + +rsp_profile — rsp • respeciate + + +
+
+ + + +
+
+ + +
+

Getting profile(s) from the R (re)SPECIATE archive

+
+ +
+
rsp_profile(..., include.refs = FALSE)
+
+rsp(...)
+
+ +
+

Arguments

+
...
+

The function assumes all inputs (except include.refs) +are SPECIES_CODEs (the unique descriptor the EPA assigns to all +profiles in SPECIATE) or sources of profile information and requests these +form the local (re)SPECIATE archive. Typically, simple +objects like character and numeric vectors, as assumed to profile codes and +composite data-types like respeciate objects or data.frame, +are assumed to contain a named PROFILE_CODE column. All potential +profile codes are requested and unrecognized codes are ignored.

+ + +
include.refs
+

logical, if profile reference information should be +included when extracting the requested profile(s) from the archive, default +FALSE.

+ +
+
+

Value

+ + +

rsp_profile or the short-hand rsp return an object of +respeciate class, a data.frame containing one or more profile +from the local (re)SPECIATE archive.

+
+
+

Note

+

The option include.refs adds profile source reference +information to the returned respeciate data set. The default option +is to not include these because some profiles have several associated +references and including these replicates records, once per reference. +respeciate code is written to handle this but if you are developing +own methods or code and include references in any profile build you may be +biasing some analyses in favor of those multiple-reference profile unless +you check and account such cases.

+
+
+

References

+

Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., +Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. +The development and uses of EPA SPECIATE database. +Atmospheric Pollution Research, 1(4), pp.196-206.

+
+ +
+

Examples

+
if (FALSE) {
+x <- rsp_profile(8833, 8850)
+plot(x)}
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.info.html b/docs/reference/rsp.info.html new file mode 100644 index 0000000..6ca207f --- /dev/null +++ b/docs/reference/rsp.info.html @@ -0,0 +1,150 @@ + +re(SPECIATE) information — rsp.info • respeciate + + +
+
+ + + +
+
+ + +
+

Functions that provide (re)SPECIATE +source information. +rsp_info generates a brief version report for the currently installed +(re)SPECIATE data sets. +rsp_profile_info searches the currently installed (re)SPECIATE +data sets for profile records. +rsp_species_info searches the currently installed (re)SPECIATE +data sets for species records.

+
+ +
+
rsp_info()
+
+rsp_profile_info(..., by = "keywords", partial = TRUE)
+
+rsp_find_profile(...)
+
+rsp_species_info(..., by = "species_name", partial = TRUE)
+
+rsp_find_species(...)
+
+ +
+

Arguments

+
...
+

character(s), any search term(s) to use when searching +the local (re)SPECIATE archive for relevant records using +rsp_profile_info or rsp_species_info.

+ + +
by
+

character, the section of the archive to +search, by default 'keywords' for rsp_profile_info and +'species_names' for sp_species_info.

+ + +
partial
+

logical, if TRUE (default) +rsp_profile_info or rsp_profile_info use partial matching.

+ +
+
+

Value

+ + +

rsp_info provides a brief version information report on the +currently installed (re)SPECIATE archive.

+ + +

rsp_profile_info returns a data.frame of +profile information, as a respeciate object. +rsp_species_info returns a data.frame of +species information as a respeciate object.

+
+ +
+

Examples

+
if (FALSE) {
+profile <- "Ethanol"
+pr <- rsp_find_profile(profile)
+pr
+
+species <- "Ethanol"
+sp <- rsp_find_species(species)
+sp}
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.match.html b/docs/reference/rsp.match.html new file mode 100644 index 0000000..85c689d --- /dev/null +++ b/docs/reference/rsp.match.html @@ -0,0 +1,163 @@ + +Find nearest matches from reference set of profiles — rsp.match • respeciate + + +
+
+ + + +
+
+ + +
+

rsp_match_profile compares a supplied species +(re)SPECIATE profile (or similar data set) and a reference set of +supplied profiles and attempt to identify nearest matches on the +basis of similarity.

+
+ +
+
rsp_match_profile(
+  rsp,
+  ref,
+  matches = 10,
+  rescale = 5,
+  min.n = 8,
+  method = "pd",
+  test.rsp = FALSE
+)
+
+ +
+

Arguments

+
rsp
+

A respeciate object or similar data.frame containing +a species profile to be compared with profiles in ref. If rsp +contains more than one profile, these are averaged (using +rsp_average_profile), and the average compared.

+ + +
ref
+

A respeciate object, a data.frame containing a +multiple species profiles, to be used as reference library when identifying +nearest matches for rsp.

+ + +
matches
+

Numeric (default 10), the maximum number of profile matches to +report.

+ + +
rescale
+

Numeric (default 5), the data scaling method to apply before +comparing rsp and profiles in ref: options 0 to 5 handled by +rsp_rescale.

+ + +
min.n
+

numeric (default 8), the minimum number of paired +species measurements in two profiles required for a match to be assessed. +See also rsp_cor_species.

+ + +
method
+

Character (default 'pd'), the similarity measure to use, current +options 'pd', the Pearson's Distance (1 - Pearson's correlation coefficient), +or 'sid', the Standardized Identity Distance (See References).

+ + +
test.rsp
+

Logical (default FALSE). The match process self-tests by adding +rsp to ref, which should generate a perfect fit=0 score. Setting +test.rsp to TRUE retains this as an extra record.

+ +
+
+

Value

+ + +

rsp_match_profile returns a fit report: a data.frame of +up to n fit reports for the nearest matches to rsp from the +reference profile data set, ref.

+
+
+

References

+

Distance metrics are based on recommendations by Belis et al (2015) +and as implemented in Mooibroek et al (2022):

+

Belis, C.A., Pernigotti, D., Karagulian, F., Pirovano, G., Larsen, B.R., +Gerboles, M., Hopke, P.K., 2015. A new methodology to assess the performance +and uncertainty of source apportionment models in intercomparison +exercises. Atmospheric Environment, 119, 35–44. +https://doi.org/10.1016/j.atmosenv.2015.08.002.

+

Mooibroek, D., Sofowote, U.M. and Hopke, P.K., 2022. Source apportionment of +ambient PM10 collected at three sites in an urban-industrial area with +multi-time resolution factor analyses. Science of The Total Environment, +850, p.157981. http://dx.doi.org/10.1016/j.scitotenv.2022.157981.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.pad.html b/docs/reference/rsp.pad.html new file mode 100644 index 0000000..8bb0485 --- /dev/null +++ b/docs/reference/rsp.pad.html @@ -0,0 +1,123 @@ + +(re)SPECIATE profile padding functions — rsp.pad • respeciate + + +
+
+ + + +
+
+ + +
+

Functions for padding respeciate objects.

+

rsp_pad pads a supplied (re)SPECIATE profile data set +with profile and species meta-data.

+
+ +
+
rsp_pad(rsp, pad = "standard", drop.nas = TRUE)
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
pad
+

character, type of meta data padding, current options +'profile', 'species', 'weight', 'reference', +'standard' (default; all but 'reference'), and 'all' +(all).

+ + +
drop.nas
+

logical, discard any rows where WEIGHT_PERCENT is +NA, default TRUE.

+ +
+
+

Value

+ + +

rsp_pad returns supplied respeciate data set, with +requested additional profile and species meta-data added as additional +data.frame columns. See Note.

+
+
+

Note

+

Some data handling can remove (re)SPECIATE meta-data, +and rsp_pads provide a quick rebuild/repair. For example, +rsp_dcasting to a (by-species or by-profile) widened +form strips some meta-data, and padding is used as part of the +rsp_melt_wide to re-add this meta-data +when returning the data set to its standard long form.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.plot.html b/docs/reference/rsp.plot.html new file mode 100644 index 0000000..87b8edb --- /dev/null +++ b/docs/reference/rsp.plot.html @@ -0,0 +1,172 @@ + +plotting (re)SPECIATE profiles — rsp.plot • respeciate + + +
+
+ + + +
+
+ + +
+

General plots for respeciate objects.

+

rsp_plot functions generate plots for supplied +(re)SPECIATE data sets.

+
+ +
+
rsp_plot_profile(
+  rsp,
+  id,
+  multi.profile = "group",
+  order = TRUE,
+  log = FALSE,
+  ...,
+  silent = FALSE
+)
+
+rsp_plot_species(
+  rsp,
+  id,
+  multi.species = "group",
+  order = FALSE,
+  log = FALSE,
+  ...,
+  silent = FALSE
+)
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
id
+

numeric, the indices of profiles or species to use when +plotting with rsp_plot_profile or rsp_plot_species, +respectively. For example, rsp_plot_profile(rsp, id=1:6) plots +first 6 profiles in respeciate object rsp.

+ + +
multi.profile
+

character, how rsp_plot_profile should +handle multiple profiles, e.g. 'group' or 'panel' (default +group).

+ + +
order
+

logical, order the species in the +profile(s) by relative abundance before plotting.

+ + +
log
+

logical, log y scale when plotting.

+ + +
...
+

any additional arguments, typically passed on the lattice +plotting functions.

+ + +
silent
+

logical, hide warnings when generating plots (default +FALSE)

+ + +
multi.species,
+

character, like multi.profile in +sp_plot_profile but for species in sp_plot_species.

+ +
+
+

Value

+ + +

sp_plot graph, plot, etc usually as a trellis object.

+
+
+

Note

+

These functions are currently in development, so may change.

+
+
+

References

+

Most respeciate plots make extensive use of +lattice and latticeExtra code:

+

Sarkar D (2008). Lattice: Multivariate Data Visualization with R. +Springer, New York. ISBN 978-0-387-75968-5, http://lmdvr.r-forge.r-project.org.

+

Sarkar D, Andrews F (2022). latticeExtra: Extra Graphical Utilities Based +on Lattice. R package version 0.6-30, +https://CRAN.R-project.org/package=latticeExtra.

+

They also incorporate ideas from loa:

+

Ropkins K (2023). loa: various plots, options and add-ins for use with lattice. +R package version 0.2.48.3, https://CRAN.R-project.org/package=loa.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.pls.html b/docs/reference/rsp.pls.html new file mode 100644 index 0000000..b415c60 --- /dev/null +++ b/docs/reference/rsp.pls.html @@ -0,0 +1,239 @@ + +(re)SPECIATE profile Positive Least Squares models — rsp.pls • respeciate + + +
+
+ + + +
+
+ + +
+

Functions for Positive Least Squares (PSL) fitting of +(re)SPECIATE profiles

+

rsp_pls_profile builds PSL models for supplied profile(s) using +the nls function, the 'port' algorithm and a lower +limit of zero for all model outputs to enforce the positive fits. The +modeled profiles are typically from an external source, e.g. a +measurement campaign, and are fit as a linear additive series of reference +profiles, here typically from (re)SPECIATE, to provide a measure of +source apportionment based on the assumption that the profiles in the +reference set are representative of the mix that make up the modeled +sample. The pls_ functions work with rsp_pls_profile +outputs, and are intended to be used when refining and analyzing +these PLS models. See also pls_plots for PLS model plots.

+
+ +
+
rsp_pls_profile(rsp, ref, power = 1, ...)
+
+pls_report(pls)
+
+pls_test(pls)
+
+pls_fit_species(
+  pls,
+  species,
+  power = 1,
+  refit.profile = TRUE,
+  as.marker = FALSE,
+  drop.missing = FALSE,
+  ...
+)
+
+pls_refit_species(
+  pls,
+  species,
+  power = 1,
+  refit.profile = TRUE,
+  as.marker = FALSE,
+  drop.missing = FALSE,
+  ...
+)
+
+pls_rebuild(
+  pls,
+  species,
+  power = 1,
+  refit.profile = TRUE,
+  as.marker = FALSE,
+  drop.missing = FALSE,
+  ...
+)
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of +profiles in standard long form, intended for PLS modelling.

+ + +
ref
+

A respeciate object, a data.frame of +profiles also in standard long form, used as the set of candidate +source profiles when fitting rsp.

+ + +
power
+

A numeric, an additional factor to be added to +weightings when fitting the PLS model. This is applied in the form +weight^power, and increasing this, increases the relative +weighting of the more heavily weighted measurements. Values in the +range 1 - 2.5 are sometimes helpful.

+ + +
...
+

additional arguments, typically ignored or passed on to +nls.

+ + +
pls
+

A rsp_pls_profile output, intended for use with +pls_ functions.

+ + +
species
+

for pls_fit_species, a data.frame of +measurements of an additional species to be fitted to an existing +PLS model, or for pls_refit_species a character vector of the +names of species already included in the model to be refit. Both are +multiple-species wrappers for pls_rebuild, a general-purpose +PLS fitter than only handles single species.

+ + +
refit.profile
+

(for pls_fit_species, pls_refit_species +and pls_rebuild) logical. When fitting a new species (or +refitted an existing species), all other species in the reference +profiles are held 'as is' and added species is fit to the source +contribution time-series of the previous PLS model. By default, the full PLS +model is then refit using the revised ref source profile to generate +a PLS model based on the revised source profiles (i.e., ref + new species +or ref + refit species). However, this second step can be omitted using +refit.profile=FALSE if you want to use the supplied species +as an indicator rather than a standard member of the apportionment model.

+ + +
as.marker
+

for pls_rebuild, pls_fit_species and +pls_refit_species, logical, default FALSE, when +fitting (or refitting) a species, treat it as source marker.

+ + +
drop.missing
+

for pls_rebuild, pls_fit_species and +pls_refit_species, logical, default FALSE, when +building or rebuilding a PLS model, discard cases where species +is missing.

+ +
+
+

Value

+ + +

rsp_pls_profile returns a list of nls models, one per +profile/measurement set in rsp. The pls_ functions work with +these outputs. pls_report generates a data.frame of +model outputs, and is used of several of the other pls_

+ + +

functions. pls_fit_species, pls_refit_species and +pls_fit_parent return the supplied rsp_pls_profile output, +updated on the basis of the pls_ function action. +pls_plots (documented separately) produce various plots +commonly used in source apportionment studies.

+
+
+

Note

+

This implementation of PLS applies the following modeling constraints:

+

1. It generates a model of rsp that is positively constrained linear +product of the profiles in ref, so outputs can only be +zero or more. Although the model is generated using nls, +which is a Nonlinear Least Squares (NLS) model, the fitting term applied +in this case is linear.

+

2. The model is fit in the form:

+

\(X_{i,j} = \sum\limits_{k=1}^{K}{N_{i,k} * M_{k,j} + e_{i,j}}\)

+

Where X is the data set of measurements, rsp, M is data set of + reference profiles, ref, N is the data set of source contributions, + the source apportion solution, to be solved by minimising e, the error terms.

+

3. The number of species in rsp must be more that the number of +profiles in ref to reduce the likelihood of over-fitting.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.pls.plot.html b/docs/reference/rsp.pls.plot.html new file mode 100644 index 0000000..c29199f --- /dev/null +++ b/docs/reference/rsp.pls.plot.html @@ -0,0 +1,130 @@ + +Plots for use with (re)SPECIATE profile Positive Least Squares models — rsp.pls.plot • respeciate + + +
+
+ + + +
+
+ + +
+

The pls_plot functions are intended for use with PLS models built +using rsp_pls_profile (documented separately). They generate some +plots commonly used with source apportionment model outputs.

+
+ +
+
pls_plot(pls, plot.type = 1, ...)
+
+pls_plot_profile(pls, plot.type = 1, log = FALSE, ...)
+
+pls_plot_species(pls, id, plot.type = 1, ...)
+
+ +
+

Arguments

+
pls
+

A sp_pls_profile output, intended for use with +pls_ functions.

+ + +
plot.type
+

numeric, the plot type if +multiple options are available.

+ + +
...
+

other arguments, typically passed on to the associated +lattice plot.

+ + +
log
+

(for pls_plot_profile only) logical, if TRUE this +applies 'log' scaling to the primary Y axes of the plot.

+ + +
id
+

numeric or character +identifying the species or profile to plot. If numeric, these are treated +as indices of the species or profile, respectively, in the PLS model; if +character, species is treated as the name of species and profile is treated +as the profile code. Both can be concatenated to produce multiple plots and +the special case id = -1 is a short cut to all species or profiles, +respectively.

+ +
+
+

Value

+ + +

pls_plots produce various plots commonly used in source +apportionment studies.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.q.html b/docs/reference/rsp.q.html new file mode 100644 index 0000000..85c91e6 --- /dev/null +++ b/docs/reference/rsp.q.html @@ -0,0 +1,127 @@ + +Quick access to common (re)SPECIATE subsets. — rsp.q • respeciate + + +
+
+ + + +
+
+ + +
+

rsp_q_ functions are quick access wrappers to commonly +requested (re)SPECIATE subsets.

+
+ +
+
rsp_q_gas()
+
+rsp_q_other()
+
+rsp_q_pm()
+
+rsp_q_pm.ae6()
+
+rsp_q_pm.ae8()
+
+rsp_q_pm.cr1()
+
+rsp_q_pm.simplified()
+
+ +
+

Value

+ + +

rsp_q_ functions typically return a respeciate

+ + +

data.frame of the requested profiles.

+ + +

For example:

+ + +

rsp_q_gas() returns all gaseous profiles in (re)SPECIATE +(PROFILE_TYPE == 'GAS').

+ + +

rsp_q_pm returns all particulate matter (PM) profiles in (re)SPECIATE +not classified as a special PM type (PROFILE_TYPE == 'PM').

+ + +

The special PM types are subsets profiles intended for special +applications, and these include rsp_q_pm.ae6 (type PM-AE6), +rsp_q_pm.ae8 (type PM-AE8), rsp_q_pm.cr1 (type +PM-CR1), and rsp_q_pm.simplified (type PM-Simplified).

+ + +

rsp_q_other returns all profiles classified as other in (re)SPECIATE +(PROFILE_TYPE == 'OTHER').

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.rescale.html b/docs/reference/rsp.rescale.html new file mode 100644 index 0000000..ed8a92a --- /dev/null +++ b/docs/reference/rsp.rescale.html @@ -0,0 +1,143 @@ + +(re)SPECIATE profile rescaling functions — rsp.rescale • respeciate + + +
+
+ + + +
+
+ + +
+

Functions for rescaling

+

rsp_rescale rescales the percentage weight records in +a supplied (re)SPECIATE profile data set. This can be by profile or species +subsets, and rsp_rescale_profile and rsp_rescale_species provide +short-cuts to these options.

+
+ +
+
rsp_rescale(rsp, method = 2, by = "species")
+
+rsp_rescale_profile(rsp, method = 1, by = "profile")
+
+rsp_rescale_species(rsp, method = 2, by = "species")
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles.

+ + +
method
+

numeric, the rescaling method to apply: + 1 x/total(x); + 2 x/mean(x); + 3 x-min(x)/max(x)-min(x); + 4 x-mean(x)/sd(x); + 5 x/max(x). +The alternative 0 returns the records to their original +values.

+ + +
by
+

character, when rescaling x with +sp_rescale, the data type to group and rescale, +currently 'species' (default) or 'profile'.

+ +
+
+

Value

+ + +

sp_rescale and sp_rescale return the +respeciate profile with the percentage weight records rescaled using +the requested method. See Note.

+
+
+

Note

+

Data sometimes needs to be normalised, e.g. when applying some +statistical analyses. Rather than modify the EPA records in the +WEIGHT_PERCENT column, respeciate creates a duplicate column +.value which is modified by operations like sp_rescale_profile +and sp_rescale_species. This means rescaling is always applied to +the source information, rather than rescaling an already rescaled value, +and the EPA records are retained unaffected. So, the original source +information can be easily recovered.

+
+
+

References

+

Dowle M, Srinivasan A (2023). data.table: Extension of `data.frame`. + R package version 1.14.8, https://CRAN.R-project.org/package=data.table.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.reshape.html b/docs/reference/rsp.reshape.html new file mode 100644 index 0000000..af827eb --- /dev/null +++ b/docs/reference/rsp.reshape.html @@ -0,0 +1,165 @@ + +(re)SPECIATE profile reshaping functions — rsp.reshape • respeciate + + +
+
+ + + +
+
+ + +
+

Functions for reshaping (re)SPECIATE profiles

+

rsp_dcast and rsp_melt_wide reshape supplied +(re)SPECIATE profile(s). rsp_dcast converts these from their supplied +long form to a widened form, dcasting the data set by either species +or profiles depending on the widen setting applied. +rsp_dcast_profile and rsp_dcast_species are wrappers for these +options. rsp_melt_wide attempts to return a previously widened data +set to the original long form.

+
+ +
+
rsp_dcast(rsp, widen = "species")
+
+rsp_dcast_profile(rsp, widen = "profile")
+
+rsp_dcast_species(rsp = rsp, widen = "species")
+
+rsp_melt_wide(rsp, pad = TRUE, drop.nas = TRUE)
+
+ +
+

Arguments

+
rsp
+

A respeciate object, a data.frame of re(SPECIATE) +profiles in standard long form or widened form using +rsp_dcast and rsp_melt_wide, respectively.

+ + +
widen
+

character, when widening rsp with +rsp_dcast, the data type to dcast, +currently 'species' (default) or 'profile'. See Note.

+ + +
pad
+

logical or character, when melting a previously widened +data set, should output be re-populated with species and/or profile +meta-data, discarded when widening. This is currently handled by +rsp_pad. The default TRUE applies standard settings, +so does not include profile sources reference meta-data. (See +rsp_pad for other options).

+ + +
drop.nas
+

logical, when melting a previously widened +data set, should output be stripped of any rows containing empty +weight/value columns. Because not all profile contains all species, the +dcast/melt process can generate empty rows, and this step +attempt account for that when working with standard re(SPECIATE) +profiles. It is, however, sometimes useful to check first, e.g. when +building profiles yourself.

+ +
+
+

Value

+ + +

rsp_dcast returns the wide form of the supplied +respeciate profile. rsp_melt_wide

+ + +

returns the (standard) long form of a previously widened profile.

+
+
+

Note

+

Conventional long-to-wide reshaping of data, or dcasting, can +be slow and memory inefficient. So, respeciate uses the +data.table::dcast +method. The rsp_dcast_species method, +applied using widen='species', is effectively:

+

dcast(..., PROFILE_CODE+PROFILE_NAME~SPECIES_NAME, value.var="WEIGHT_PERCENT")

+

And, the alternative widen='profile':

+

dcast(..., SPECIES_ID+SPECIES_NAME~PROFILE_CODE, value.var="WEIGHT_PERCENT")

+

Although, respeciate uses a local version of WEIGHT_PERCENT called +.value, so the EPA source information can easily be recovered. See also +sp_rescale_profile.

+
+
+

References

+

Dowle M, Srinivasan A (2023). _data.table: Extension of `data.frame`_. + R package version 1.14.8, <https://CRAN.R-project.org/package=data.table>.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rsp.x.html b/docs/reference/rsp.x.html new file mode 100644 index 0000000..b8600c7 --- /dev/null +++ b/docs/reference/rsp.x.html @@ -0,0 +1,144 @@ + +rsp_x_ functions for grouping and subsetting (re)SPECIATE profiles — rsp.x • respeciate + + +
+
+ + + +
+
+ + +
+

rsp_x_ functions generate a vector of assignment +terms and can be used to subset or condition a supplied (re)SPECIATE +data.frame.

+

Most commonly, the rsp_x_ functions accept a single input, a +(re)SPECIATE data.frame and return a logical vector of +length nrow(x), identifying species of interest as +TRUE. So, for example, they can be used when +subsetting in the form:

+

subset(rsp, rsp_x_nalkane(rsp))

+

... to extract just n-alkane records from a supplied respeciate +object rsp.

+

However, some accept additional arguments. For example, rsp_x_copy +also accepts a reference data set, ref, and a column identifier, +by, and tests rsp$by %in% unique(ref$by).

+
+ +
+
rsp_x_copy(rsp, ref = NULL, by = "species_id")
+
+rsp_x_nalkane(rsp)
+
+rsp_x_btex(rsp)
+
+ +
+

Arguments

+
rsp
+

a respeciate object, a data.frame of (re)SPECIATE +profiles.

+ + +
ref
+

(rsp_x_copy only) a second respeciate object, to +be used as reference when subsetting (or conditioning) rsp.

+ + +
by
+

(rsp_x_copy only) character, the name of the column +in ref to copy when subsetting (or conditioning) rsp.

+ +
+
+

Value

+ + +

rsp_x_copy outputs can be modified but, by default, it +identifies all species in the supplied reference data set.

+ + +

rsp_x_nalkane identifies (straight chain) C1 to C40 n-alkanes.

+ + +

rsp_x_btex identifies the BTEX group of aromatic hydrocarbons +(benzene, toluene, ethyl benzene, and M-, O- and P-xylene).

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/reference/sysdata.html b/docs/reference/sysdata.html index 020a396..8cea5b8 100644 --- a/docs/reference/sysdata.html +++ b/docs/reference/sysdata.html @@ -19,7 +19,7 @@ respeciate - 0.2.6 + 0.3.0 diff --git a/man/figures/unnamed-chunk-6-1.png b/man/figures/unnamed-chunk-6-1.png index f4a086f..ce01d3b 100644 Binary files a/man/figures/unnamed-chunk-6-1.png and b/man/figures/unnamed-chunk-6-1.png differ diff --git a/man/find_code.Rd b/man/find_code.Rd deleted file mode 100644 index 827dfdd..0000000 --- a/man/find_code.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/speciate.R -\name{find_code} -\alias{find_code} -\title{Find PROFILE_CODE} -\usage{ -find_code(profile, by = "Keywords") -} -\arguments{ -\item{profile}{Character, to search PROFILE CODE} - -\item{by}{Character, to search code. eg: "Keywords", "PROFILE_NOTES", "PROFILE_TYPE" -or other name of PROFILES} -} -\value{ -a data.frame with with profile codes -} -\description{ -\code{\link{find_code}} Return a data.frame with profile codes -} -\examples{ -\dontrun{ -profile <- "Ethanol" -dt <- find_code(profile) -} -} -\references{ -Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -The development and uses of EPA SPECIATE database. -Atmospheric Pollution Research, 1(4), pp.196-206. -} diff --git a/man/respeciate-package.Rd b/man/respeciate-package.Rd index 2dde598..b92bef7 100644 --- a/man/respeciate-package.Rd +++ b/man/respeciate-package.Rd @@ -8,7 +8,7 @@ \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -Acess to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) \doi{10.5094/APR.2010.026}. +Access to the US.EPA Speciate (v5.2) tool, to generate speciation profiles for gases and particles. More details in Simon et al (2010) \doi{10.5094/APR.2010.026}. } \seealso{ Useful links: diff --git a/man/respeciate.generics.Rd b/man/respeciate.generics.Rd index cd90be2..cc5471b 100644 --- a/man/respeciate.generics.Rd +++ b/man/respeciate.generics.Rd @@ -2,35 +2,48 @@ % Please edit documentation in R/respeciate.generics.R \name{respeciate.generics} \alias{respeciate.generics} +\alias{as.respeciate} +\alias{as.respeciate.default} \alias{print.respeciate} \alias{print.rsp_pls} \alias{plot.respeciate} +\alias{plot.rsp_pls} \alias{summary.respeciate} \title{respeciate.generics} \usage{ -\method{print}{respeciate}(x, n = NULL, ...) +as.respeciate(x, ...) + +\method{as.respeciate}{default}(x, ...) + +\method{print}{respeciate}(x, n = 6, ...) \method{print}{rsp_pls}(x, n = NULL, ...) \method{plot}{respeciate}(x, ...) +\method{plot}{rsp_pls}(x, ...) + \method{summary}{respeciate}(object, ...) } \arguments{ \item{x}{the \code{respeciate} object to be printed, plotted, etc.} +\item{...}{any extra arguments, mostly ignored except by +\code{plot} which passes them to \code{\link{rsp_plot_profile}}.} + \item{n}{when plotting or printing a multi-profile object, the maximum number of profiles to report.} -\item{...}{any extra arguments, mostly ignored except by -\code{plot} which passes them to \code{\link{sp_plot_profile}}.} - \item{object}{like \code{x} but for \code{summary}.} } \description{ \code{respeciate} object classes and generic functions. +When supplied a \code{data.frame} or similar, +\code{\link{as.respeciate}} attempts to coerce it into a +\code{respeciate} object. + When supplied a \code{respeciate} object or similar, \code{\link{print}} manages its appearance. diff --git a/man/rsp.Rd b/man/rsp.Rd new file mode 100644 index 0000000..e0c1e78 --- /dev/null +++ b/man/rsp.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.R +\name{rsp} +\alias{rsp} +\alias{rsp_profile} +\title{rsp_profile} +\usage{ +rsp_profile(..., include.refs = FALSE) + +rsp(...) +} +\arguments{ +\item{...}{The function assumes all inputs (except \code{include.refs}) +are \code{SPECIES_CODE}s (the unique descriptor the EPA assigns to all +profiles in SPECIATE) or sources of profile information and requests these +form the local (re)SPECIATE archive. Typically, simple +objects like character and numeric vectors, as assumed to profile codes and +composite data-types like \code{respeciate} objects or \code{data.frame}, +are assumed to contain a named \code{PROFILE_CODE} column. All potential +profile codes are requested and unrecognized codes are ignored.} + +\item{include.refs}{logical, if profile reference information should be +included when extracting the requested profile(s) from the archive, default +\code{FALSE}.} +} +\value{ +\code{rsp_profile} or the short-hand \code{rsp} return an object of +\code{respeciate} class, a \code{data.frame} containing one or more profile +from the local (re)SPECIATE archive. +} +\description{ +Getting profile(s) from the R (re)SPECIATE archive +} +\note{ +The option \code{include.refs} adds profile source reference +information to the returned \code{respeciate} data set. The default option +is to not include these because some profiles have several associated +references and including these replicates records, once per reference. +\code{respeciate} code is written to handle this but if you are developing +own methods or code and include references in any profile build you may be +biasing some analyses in favor of those multiple-reference profile unless +you check and account such cases. +} +\examples{ +\dontrun{ +x <- rsp_profile(8833, 8850) +plot(x)} +} +\references{ +Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., +Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. +The development and uses of EPA SPECIATE database. +Atmospheric Pollution Research, 1(4), pp.196-206. +} diff --git a/man/sp.average.Rd b/man/rsp.average.Rd similarity index 67% rename from man/sp.average.Rd rename to man/rsp.average.Rd index a2a967c..acaaa5d 100644 --- a/man/sp.average.Rd +++ b/man/rsp.average.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.average.R -\name{sp.average} -\alias{sp.average} -\alias{sp_average_profile} -\title{speciate data averaging functions} +% Please edit documentation in R/rsp.average.R +\name{rsp.average} +\alias{rsp.average} +\alias{rsp_average_profile} +\title{(re)SPECIATE data averaging functions} \usage{ -sp_average_profile(x, code = NULL, name = NULL, method = 1, ...) +rsp_average_profile(rsp, code = NULL, name = NULL, method = 1, ...) } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} \item{code}{required character, the unique profile code to assign to the @@ -19,18 +19,18 @@ profile. If not supplied, this defaults to a collapsed list of the codes of all the profiles averaged.} \item{method}{numeric, the averaging method to apply: Currently only 1 (default) -\code{mean(x)}.} +\code{mean(rsp)}.} \item{...}{additional arguments, currently ignored} } \value{ -\code{sp_average_profile} returns a single profile average +\code{rsp_average_profile} returns a single profile average version of the supplied \code{respeciate} profile. } \description{ Functions to build composite (re)SPECIATE profiles -\code{sp_average_profile} generates an average composite +\code{rsp_average_profile} generates an average composite of a supplied multi-profile \code{respeciate} object. } \note{ diff --git a/man/rsp.build.Rd b/man/rsp.build.Rd new file mode 100644 index 0000000..634ac7d --- /dev/null +++ b/man/rsp.build.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.build.R +\name{rsp.build} +\alias{rsp.build} +\alias{rsp_build_x} +\title{Building respeciate-like Objects} +\usage{ +rsp_build_x( + x, + profile_code, + profile_name, + species_name, + species_id, + value, + ... +) +} +\arguments{ +\item{x}{\code{data.frame} or similar (i.e. +something that can be coerced into a \code{data.frame} using +\code{as.data.frame}) to be converted into a \code{respeciate} object.} + +\item{profile_name, profile_code}{(\code{character}) The name of the column +in \code{x} containing profile name and code records, respectively. If not +already named according to SPECIATE conventions, at least one of these will +need to be assigned.} + +\item{species_name, species_id}{(\code{character}) The name of the column +in \code{x} containing species name and id records, respectively. If not +already named according to SPECIATE conventions, at least one of these will +need to be assigned.} + +\item{value}{(\code{character}) The name of the column in \code{x} +containing measurement values. If not already named according to SPECIATE +conventions, this will need to be assigned.} + +\item{...}{(any other arguments) currently ignored.} +} +\value{ +\code{rsp_build}s attempt to build and return a (re)SPECIATE-like +object that can be compared with data from re(SPECIATE). +} +\description{ +rsp function(s) to reconfigure data.frames (and similar +object classes) for use with data and functions in re(SPECIATE). +} +\note{ +If you want to compare your data with profiles in the SPECIATE archive, +you need to use EPA SPECIATE conventions when assigning species names and +identifiers. Currently, we are working on options to improve on this (and +very happy to discuss if anyone has ideas), but current best suggestion is: +(1) identify the SPECIATE species code for each of the species in your data set, +and (2) assign these as \code{species_id} when \code{rsp_build}ing. The +function will then associate the \code{species_name} from SPECIATE species +records. +} diff --git a/man/sp.cluster.Rd b/man/rsp.cluster.Rd similarity index 50% rename from man/sp.cluster.Rd rename to man/rsp.cluster.Rd index 0e51b66..b4882e2 100644 --- a/man/sp.cluster.Rd +++ b/man/rsp.cluster.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.cluster.R -\name{sp.cluster} -\alias{sp.cluster} -\alias{sp_profile_distance} -\title{sp_profile clustering} +% Please edit documentation in R/rsp.cluster.R +\name{rsp.cluster} +\alias{rsp.cluster} +\alias{rsp_distance_profile} +\title{(re)SPECIATE profile cluster analysis methods} \usage{ -sp_profile_distance(x, output = c("plot", "report")) +rsp_distance_profile(rsp, output = c("plot", "report")) } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} \item{output}{Character vector, required function output: \code{'report'} the @@ -16,15 +16,15 @@ calculated distance matrix; \code{'plot'} a heat map of that distance matrix.} } \value{ -Depending on the \code{output} option, \code{sp_profile_distance} returns +Depending on the \code{output} option, \code{sp_distance_profile} returns one or more of the following: the correlation matrix, a heat map of the correlation matrix. } \description{ -sp_profile functions for studying similarities (or -dissimilarities) within multi-profile (re)SPECIATE data sets +(re)SPECIATE functions for studying similarities (or +dissimilarities) within (re)SPECIATE data sets -\code{\link{sp_profile_distance}} calculates the statistical distance +\code{\link{rsp_distance_profile}} calculates the statistical distance between re(SPECIATE) profiles, and clusters profiles according to nearness. } \note{ diff --git a/man/sp.cor.Rd b/man/rsp.cor.Rd similarity index 84% rename from man/sp.cor.Rd rename to man/rsp.cor.Rd index 046f5e3..947e551 100644 --- a/man/sp.cor.Rd +++ b/man/rsp.cor.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.cor.R -\name{sp.cor} -\alias{sp.cor} -\alias{sp_species_cor} +% Please edit documentation in R/rsp.cor.R +\name{rsp.cor} +\alias{rsp.cor} +\alias{rsp_cor_species} \title{(re)SPECIATE Species Correlations} \usage{ -sp_species_cor( - x, +rsp_cor_species( + rsp, min.n = 3, cols = c("#80FFFF", "#FFFFFF", "#FF80FF"), na.col = "#CFCFCF", @@ -16,7 +16,7 @@ sp_species_cor( ) } \arguments{ -\item{x}{\code{respeciate} object, a \code{data.frame} of re(SPECIATE) +\item{rsp}{\code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} \item{min.n}{\code{numeric} (default 3), the minimum number of species measurements @@ -53,14 +53,14 @@ correlation matrix invisibly; \code{TRUE} to return the matrix (visibly); and, \code{FALSE} to not return it.} } \value{ -By default \code{sp_species_cor} invisibly returns the calculated +By default \code{rsp_cor_species} invisibly returns the calculated correlation matrix a plots it as a heat map, but arguments including \code{heatmap} and \code{report} can be used to modify function outputs. } \description{ -sp_species functions for studying relationships between -species in multi-profile (re)SPECIATE data sets. +(re)SPECIATE functions for studying relationships between +species in (re)SPECIATE data sets. -\code{\link{sp_species_cor}} generates a by-species correlation +\code{\link{rsp_cor_species}} generates a by-species correlation matrix of the supplied (re)SPECIATE data sets. } diff --git a/man/rsp.info.Rd b/man/rsp.info.Rd new file mode 100644 index 0000000..acf71fe --- /dev/null +++ b/man/rsp.info.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.info.R +\name{rsp.info} +\alias{rsp.info} +\alias{rsp_info} +\alias{rsp_profile_info} +\alias{rsp_species_info} +\alias{rsp_find_profile} +\alias{rsp_find_species} +\title{re(SPECIATE) information} +\usage{ +rsp_info() + +rsp_profile_info(..., by = "keywords", partial = TRUE) + +rsp_find_profile(...) + +rsp_species_info(..., by = "species_name", partial = TRUE) + +rsp_find_species(...) +} +\arguments{ +\item{...}{character(s), any search term(s) to use when searching +the local (re)SPECIATE archive for relevant records using +\code{rsp_profile_info} or \code{rsp_species_info}.} + +\item{by}{character, the section of the archive to +search, by default \code{'keywords'} for \code{rsp_profile_info} and +\code{'species_names'} for \code{sp_species_info}.} + +\item{partial}{logical, if \code{TRUE} (default) +\code{rsp_profile_info} or \code{rsp_profile_info} use partial matching.} +} +\value{ +\code{rsp_info} provides a brief version information report on the +currently installed (re)SPECIATE archive. + +\code{rsp_profile_info} returns a \code{data.frame} of +profile information, as a \code{respeciate} object. +\code{rsp_species_info} returns a \code{data.frame} of +species information as a \code{respeciate} object. +} +\description{ +Functions that provide (re)SPECIATE +source information. +\code{rsp_info} generates a brief version report for the currently installed +(re)SPECIATE data sets. +\code{rsp_profile_info} searches the currently installed (re)SPECIATE +data sets for profile records. +\code{rsp_species_info} searches the currently installed (re)SPECIATE +data sets for species records. +} +\examples{ +\dontrun{ +profile <- "Ethanol" +pr <- rsp_find_profile(profile) +pr + +species <- "Ethanol" +sp <- rsp_find_species(species) +sp} + +} diff --git a/man/sp.match.Rd b/man/rsp.match.Rd similarity index 60% rename from man/sp.match.Rd rename to man/rsp.match.Rd index b8b7fe8..988527d 100644 --- a/man/sp.match.Rd +++ b/man/rsp.match.Rd @@ -1,59 +1,59 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.match.R -\name{sp.match} -\alias{sp.match} -\alias{sp_match_profile} +% Please edit documentation in R/rsp.match.R +\name{rsp.match} +\alias{rsp.match} +\alias{rsp_match_profile} \title{Find nearest matches from reference set of profiles} \usage{ -sp_match_profile( - x, +rsp_match_profile( + rsp, ref, matches = 10, rescale = 5, min.n = 8, method = "pd", - test.x = FALSE + test.rsp = FALSE ) } \arguments{ -\item{x}{A \code{respeciate} object or similar \code{data.frame} containing -a species profile to be compared with profiles in \code{ref}. If \code{x} +\item{rsp}{A \code{respeciate} object or similar \code{data.frame} containing +a species profile to be compared with profiles in \code{ref}. If \code{rsp} contains more than one profile, these are averaged (using -\code{\link{sp_average_profile}}), and the average compared.} +\code{\link{rsp_average_profile}}), and the average compared.} \item{ref}{A \code{respeciate} object, a \code{data.frame} containing a multiple species profiles, to be used as reference library when identifying -nearest matches for \code{x}.} +nearest matches for \code{rsp}.} \item{matches}{Numeric (default 10), the maximum number of profile matches to report.} \item{rescale}{Numeric (default 5), the data scaling method to apply before -comparing \code{x} and profiles in \code{ref}: options 0 to 5 handled by -\code{\link{sp_rescale}}.} +comparing \code{rsp} and profiles in \code{ref}: options 0 to 5 handled by +\code{\link{rsp_rescale}}.} \item{min.n}{\code{numeric} (default 8), the minimum number of paired species measurements in two profiles required for a match to be assessed. -See also \code{\link{sp_species_cor}}.} +See also \code{\link{rsp_cor_species}}.} \item{method}{Character (default 'pd'), the similarity measure to use, current -options 'pd', the Pearson's Distance (1- Pearson's correlation coefficient), +options 'pd', the Pearson's Distance (1 - Pearson's correlation coefficient), or 'sid', the Standardized Identity Distance (See References).} -\item{test.x}{Logical (default FALSE). The match process self-tests by adding -\code{x} to \code{ref}, which should generate a perfect fit=0 score. Setting -\code{test.x} to \code{TRUE} retains this as an extra record.} +\item{test.rsp}{Logical (default FALSE). The match process self-tests by adding +\code{rsp} to \code{ref}, which should generate a perfect fit=0 score. Setting +\code{test.rsp} to \code{TRUE} retains this as an extra record.} } \value{ -\code{sp_match_profile} returns a fit report: a \code{data.frame} of -up to \code{n} fit reports for the nearest matches to \code{x} from the +\code{rsp_match_profile} returns a fit report: a \code{data.frame} of +up to \code{n} fit reports for the nearest matches to \code{rsp} from the reference profile data set, \code{ref}. } \description{ -\code{sp_match_profile} compares a supplied species -(re)SPECIATE profile and a reference set of supplied profiles and -attempt to identify nearest matches on the basis of correlation -coefficient. +\code{rsp_match_profile} compares a supplied species +(re)SPECIATE profile (or similar data set) and a reference set of +supplied profiles and attempt to identify nearest matches on the +basis of similarity. } \references{ Distance metrics are based on recommendations by Belis et al (2015) diff --git a/man/rsp.pad.Rd b/man/rsp.pad.Rd new file mode 100644 index 0000000..da4eeca --- /dev/null +++ b/man/rsp.pad.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.pad.R +\name{rsp.pad} +\alias{rsp.pad} +\alias{rsp_pad} +\title{(re)SPECIATE profile padding functions} +\usage{ +rsp_pad(rsp, pad = "standard", drop.nas = TRUE) +} +\arguments{ +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +profiles.} + +\item{pad}{character, type of meta data padding, current options +\code{'profile'}, \code{'species'}, \code{'weight'}, \code{'reference'}, +\code{'standard'} (default; all but \code{'reference'}), and \code{'all'} +(all).} + +\item{drop.nas}{logical, discard any rows where \code{WEIGHT_PERCENT} is +\code{NA}, default \code{TRUE}.} +} +\value{ +\code{rsp_pad} returns supplied \code{respeciate} data set, with +requested additional profile and species meta-data added as additional +\code{data.frame} columns. See Note. +} +\description{ +Functions for padding \code{respeciate} objects. + +\code{rsp_pad} pads a supplied (re)SPECIATE profile data set +with profile and species meta-data. +} +\note{ +Some data handling can remove (re)SPECIATE meta-data, +and \code{rsp_pad}s provide a quick rebuild/repair. For example, +\code{\link{rsp_dcast}}ing to a (by-species or by-profile) widened +form strips some meta-data, and padding is used as part of the +\code{\link{rsp_melt_wide}} to re-add this meta-data +when returning the data set to its standard long form. +} diff --git a/man/sp.plot.Rd b/man/rsp.plot.Rd similarity index 75% rename from man/sp.plot.Rd rename to man/rsp.plot.Rd index dfcecd3..fefe2c3 100644 --- a/man/sp.plot.Rd +++ b/man/rsp.plot.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.plot.R -\name{sp.plot} -\alias{sp.plot} -\alias{sp_plot_profile} -\alias{sp_plot_species} +% Please edit documentation in R/rsp.plot.R +\name{rsp.plot} +\alias{rsp.plot} +\alias{rsp_plot_profile} +\alias{rsp_plot_species} \title{plotting (re)SPECIATE profiles} \usage{ -sp_plot_profile( - x, +rsp_plot_profile( + rsp, id, multi.profile = "group", order = TRUE, @@ -16,8 +16,8 @@ sp_plot_profile( silent = FALSE ) -sp_plot_species( - x, +rsp_plot_species( + rsp, id, multi.species = "group", order = FALSE, @@ -27,15 +27,15 @@ sp_plot_species( ) } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} \item{id}{numeric, the indices of profiles or species to use when -plotting with \code{sp_plot_profile} or \code{sp_plot_species}, -respectively. For example, \code{sp_plot_profile(x, id=1:6)} plots -first 6 profiles in \code{respeciate} object \code{x}.} +plotting with \code{rsp_plot_profile} or \code{rsp_plot_species}, +respectively. For example, \code{rsp_plot_profile(rsp, id=1:6)} plots +first 6 profiles in \code{respeciate} object \code{rsp}.} -\item{multi.profile}{character, how \code{sp_plot_profile} should +\item{multi.profile}{character, how \code{rsp_plot_profile} should handle multiple profiles, e.g. 'group' or 'panel' (default group).} @@ -59,7 +59,7 @@ plotting functions.} \description{ General plots for \code{respeciate} objects. -\code{sp_plot} functions generate plots for supplied +\code{rsp_plot} functions generate plots for supplied (re)SPECIATE data sets. } \note{ diff --git a/man/sp.pls.Rd b/man/rsp.pls.Rd similarity index 68% rename from man/sp.pls.Rd rename to man/rsp.pls.Rd index 1c9850c..b1f86c8 100644 --- a/man/sp.pls.Rd +++ b/man/rsp.pls.Rd @@ -1,19 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.pls.R -\name{sp.pls} -\alias{sp.pls} -\alias{sp_pls_profile} +% Please edit documentation in R/rsp.pls.R +\name{rsp.pls} +\alias{rsp.pls} +\alias{rsp_pls_profile} \alias{pls_report} \alias{pls_test} \alias{pls_fit_species} \alias{pls_refit_species} \alias{pls_rebuild} -\alias{pls_plot} -\alias{pls_plot_species} -\alias{pls_plot_profile} -\title{(re)SPECIATE profile Positive Least Squares} +\title{(re)SPECIATE profile Positive Least Squares models} \usage{ -sp_pls_profile(x, ref, power = 1, ...) +rsp_pls_profile(rsp, ref, power = 1, ...) pls_report(pls) @@ -48,20 +45,14 @@ pls_rebuild( drop.missing = FALSE, ... ) - -pls_plot(pls, n, type = 1, ...) - -pls_plot_species(pls, n, type = 1, ...) - -pls_plot_profile(pls, n, log = FALSE, ...) } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of profiles in standard long form, intended for PLS modelling.} \item{ref}{A \code{respeciate} object, a \code{data.frame} of profiles also in standard long form, used as the set of candidate -source profiles when fitting \code{x}.} +source profiles when fitting \code{rsp}.} \item{power}{A numeric, an additional factor to be added to weightings when fitting the PLS model. This is applied in the form @@ -72,8 +63,8 @@ range \code{1 - 2.5} are sometimes helpful.} \item{...}{additional arguments, typically ignored or passed on to \code{\link{nls}}.} -\item{pls}{A \code{sp_pls_profile} output, only used by \code{pls_} -functions.} +\item{pls}{A \code{rsp_pls_profile} output, intended for use with +\code{pls_} functions.} \item{species}{for \code{pls_fit_species}, a data.frame of measurements of an additional species to be fitted to an existing @@ -101,37 +92,23 @@ fitting (or refitting) a species, treat it as source marker.} \code{pls_refit_species}, \code{logical}, default \code{FALSE}, when building or rebuilding a PLS model, discard cases where \code{species} is missing.} - -\item{n}{(for \code{pls_plot}s only) numeric or character -identifying the species or profile to plot. If numeric, these are treated -as indices of the species or profile, respectively, in the PLS model; if -character, species is treated as the name of species and profile is treated -as the profile code. Both can be concatenated to produce multiple plots and -the special case \code{n = -1} is a short cut to all species or profiles, -respectively.} - -\item{type}{(for \code{pls_plot}s only) numeric, the plot type if -multiple options are available.} - -\item{log}{(for \code{pls_plot_profile} only) logical, if \code{TRUE} this -applies 'log' scaling to the primary Y axes of the plot.} } \value{ -\code{sp_pls_profile} returns a list of nls models, one per -profile/measurement set in \code{x}. The \code{pls_} functions work with +\code{rsp_pls_profile} returns a list of nls models, one per +profile/measurement set in \code{rsp}. The \code{pls_} functions work with these outputs. \code{pls_report} generates a \code{data.frame} of model outputs, and is used of several of the other \code{pls_} functions. \code{pls_fit_species}, \code{pls_refit_species} and -\code{pls_fit_parent} return the supplied \code{sp_pls_profile} output, +\code{pls_fit_parent} return the supplied \code{rsp_pls_profile} output, updated on the basis of the \code{pls_} function action. -\code{pls_plot}s produce various plots commonly used in source -apportionment studies. +\code{pls_plot}s (documented separately) produce various plots +commonly used in source apportionment studies. } \description{ Functions for Positive Least Squares (PSL) fitting of (re)SPECIATE profiles -\code{sp_pls_profile} builds PSL models for supplied profile(s) using +\code{rsp_pls_profile} builds PSL models for supplied profile(s) using the \code{\link{nls}} function, the 'port' algorithm and a lower limit of zero for all model outputs to enforce the positive fits. The modeled profiles are typically from an external source, e.g. a @@ -139,19 +116,27 @@ measurement campaign, and are fit as a linear additive series of reference profiles, here typically from (re)SPECIATE, to provide a measure of source apportionment based on the assumption that the profiles in the reference set are representative of the mix that make up the modeled -sample. The \code{pls_} functions work with \code{sp_pls_profile} +sample. The \code{pls_} functions work with \code{rsp_pls_profile} outputs, and are intended to be used when refining and analyzing -these PLS models. +these PLS models. See also \code{pls_plot}s for PLS model plots. } \note{ This implementation of PLS applies the following modeling constraints: -1. It generates a model of \code{x} that is positively constrained linear +1. It generates a model of \code{rsp} that is positively constrained linear product of the profiles in \code{ref}, so outputs can only be zero or more. Although the model is generated using \code{\link{nls}}, which is a Nonlinear Least Squares (NLS) model, the fitting term applied in this case is linear. -2. The number of species in \code{x} must be more that the number of +2. The model is fit in the form: + + \eqn{X_{i,j} = \sum\limits_{k=1}^{K}{N_{i,k} * M_{k,j} + e_{i,j}}} + + Where X is the data set of measurements, \code{rsp}, M is data set of + reference profiles, \code{ref}, N is the data set of source contributions, + the source apportion solution, to be solved by minimising e, the error terms. + +3. The number of species in \code{rsp} must be more that the number of profiles in \code{ref} to reduce the likelihood of over-fitting. } diff --git a/man/rsp.pls.plot.Rd b/man/rsp.pls.plot.Rd new file mode 100644 index 0000000..39783f7 --- /dev/null +++ b/man/rsp.pls.plot.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.pls.plot.R +\name{rsp.pls.plot} +\alias{rsp.pls.plot} +\alias{pls_plot} +\alias{pls.plot} +\alias{pls_plot_species} +\alias{pls_plot_profile} +\title{Plots for use with (re)SPECIATE profile Positive Least Squares models} +\usage{ +pls_plot(pls, plot.type = 1, ...) + +pls_plot_profile(pls, plot.type = 1, log = FALSE, ...) + +pls_plot_species(pls, id, plot.type = 1, ...) +} +\arguments{ +\item{pls}{A \code{sp_pls_profile} output, intended for use with +\code{pls_} functions.} + +\item{plot.type}{numeric, the plot type if +multiple options are available.} + +\item{...}{other arguments, typically passed on to the associated +\code{lattice} plot.} + +\item{log}{(for \code{pls_plot_profile} only) logical, if \code{TRUE} this +applies 'log' scaling to the primary Y axes of the plot.} + +\item{id}{numeric or character +identifying the species or profile to plot. If numeric, these are treated +as indices of the species or profile, respectively, in the PLS model; if +character, species is treated as the name of species and profile is treated +as the profile code. Both can be concatenated to produce multiple plots and +the special case \code{id = -1} is a short cut to all species or profiles, +respectively.} +} +\value{ +\code{pls_plot}s produce various plots commonly used in source +apportionment studies. +} +\description{ +The \code{pls_plot} functions are intended for use with PLS models built +using \code{rsp_pls_profile} (documented separately). They generate some +plots commonly used with source apportionment model outputs. +} diff --git a/man/rsp.q.Rd b/man/rsp.q.Rd new file mode 100644 index 0000000..86bde1b --- /dev/null +++ b/man/rsp.q.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.q.R +\name{rsp.q} +\alias{rsp.q} +\alias{rsp_q_gas} +\alias{rsp_q} +\alias{rsp_q_other} +\alias{rsp_q_pm} +\alias{rsp_q_pm.ae6} +\alias{rsp_q_pm.ae8} +\alias{rsp_q_pm.cr1} +\alias{rsp_q_pm.simplified} +\title{Quick access to common (re)SPECIATE subsets.} +\usage{ +rsp_q_gas() + +rsp_q_other() + +rsp_q_pm() + +rsp_q_pm.ae6() + +rsp_q_pm.ae8() + +rsp_q_pm.cr1() + +rsp_q_pm.simplified() +} +\value{ +\code{rsp_q_} functions typically return a \code{respeciate} +\code{data.frame} of the requested profiles. + +For example: + +\code{rsp_q_gas()} returns all gaseous profiles in (re)SPECIATE +(\code{PROFILE_TYPE == 'GAS'}). + +\code{rsp_q_pm} returns all particulate matter (PM) profiles in (re)SPECIATE +not classified as a special PM type (\code{PROFILE_TYPE == 'PM'}). + +The special PM types are subsets profiles intended for special +applications, and these include \code{rsp_q_pm.ae6} (type \code{PM-AE6}), +\code{rsp_q_pm.ae8} (type \code{PM-AE8}), \code{rsp_q_pm.cr1} (type +\code{PM-CR1}), and \code{rsp_q_pm.simplified} (type \code{PM-Simplified}). + +\code{rsp_q_other} returns all profiles classified as other in (re)SPECIATE +(\code{PROFILE_TYPE == 'OTHER'}). +} +\description{ +\code{rsp_q_} functions are quick access wrappers to commonly +requested (re)SPECIATE subsets. +} diff --git a/man/sp.rescale.Rd b/man/rsp.rescale.Rd similarity index 75% rename from man/sp.rescale.Rd rename to man/rsp.rescale.Rd index a04852a..f127aae 100644 --- a/man/sp.rescale.Rd +++ b/man/rsp.rescale.Rd @@ -1,20 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.rescale.R -\name{sp.rescale} -\alias{sp.rescale} +% Please edit documentation in R/rsp.rescale.R +\name{rsp.rescale} +\alias{rsp.rescale} +\alias{rsp_rescale} \alias{sp_rescale} \alias{sp_rescale_profile} \alias{sp_rescale_species} +\alias{rsp_rescale_profile} +\alias{rsp_rescale_species} \title{(re)SPECIATE profile rescaling functions} \usage{ -sp_rescale(x, method = 2, by = "species") +rsp_rescale(rsp, method = 2, by = "species") -sp_rescale_profile(x, method = 1, by = "profile") +rsp_rescale_profile(rsp, method = 1, by = "profile") -sp_rescale_species(x, method = 2, by = "species") +rsp_rescale_species(rsp, method = 2, by = "species") } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) profiles.} \item{method}{numeric, the rescaling method to apply: @@ -38,9 +41,9 @@ the requested method. See Note. \description{ Functions for rescaling -\code{sp_rescale} rescales the percentage weight records in +\code{rsp_rescale} rescales the percentage weight records in a supplied (re)SPECIATE profile data set. This can be by profile or species -subsets, and \code{sp_rescale_profile} and \code{sp_rescale_species} provide +subsets, and \code{rsp_rescale_profile} and \code{rsp_rescale_species} provide short-cuts to these options. } \note{ diff --git a/man/sp.reshape.Rd b/man/rsp.reshape.Rd similarity index 62% rename from man/sp.reshape.Rd rename to man/rsp.reshape.Rd index 01f472f..831dc35 100644 --- a/man/sp.reshape.Rd +++ b/man/rsp.reshape.Rd @@ -1,36 +1,36 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.reshape.R -\name{sp.reshape} -\alias{sp.reshape} -\alias{sp_dcast} -\alias{sp_dcast_profile} -\alias{sp_dcast_species} -\alias{sp_melt_wide} +% Please edit documentation in R/rsp.reshape.R +\name{rsp.reshape} +\alias{rsp.reshape} +\alias{rsp_dcast} +\alias{rsp_dcast_profile} +\alias{rsp_dcast_species} +\alias{rsp_melt_wide} \title{(re)SPECIATE profile reshaping functions} \usage{ -sp_dcast(x, widen = "species") +rsp_dcast(rsp, widen = "species") -sp_dcast_profile(x, widen = "profile") +rsp_dcast_profile(rsp, widen = "profile") -sp_dcast_species(x, widen = "species") +rsp_dcast_species(rsp = rsp, widen = "species") -sp_melt_wide(x, pad = TRUE, drop.nas = TRUE) +rsp_melt_wide(rsp, pad = TRUE, drop.nas = TRUE) } \arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) -profiles in standard long form or widened form for -\code{\link{sp_dcast}} and \code{\link{sp_melt_wide}}, respectively.} +\item{rsp}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) +profiles in standard long form or widened form using +\code{\link{rsp_dcast}} and \code{\link{rsp_melt_wide}}, respectively.} -\item{widen}{character, when widening \code{x} with -\code{\link{sp_dcast}}, the data type to \code{dcast}, +\item{widen}{character, when widening \code{rsp} with +\code{\link{rsp_dcast}}, the data type to \code{dcast}, currently \code{'species'} (default) or \code{'profile'}. See Note.} \item{pad}{logical or character, when \code{melt}ing a previously widened data set, should output be re-populated with species and/or profile meta-data, discarded when widening. This is currently handled by -\code{\link{sp_pad}}. The default \code{TRUE} applies standard settings, +\code{\link{rsp_pad}}. The default \code{TRUE} applies standard settings, so does not include profile sources reference meta-data. (See -\code{\link{sp_pad}} for other options).} +\code{\link{rsp_pad}} for other options).} \item{drop.nas}{logical, when \code{melt}ing a previously widened data set, should output be stripped of any rows containing empty @@ -41,26 +41,26 @@ profiles. It is, however, sometimes useful to check first, e.g. when building profiles yourself.} } \value{ -\code{sp_dcast} returns the wide form of the supplied -\code{respeciate} profile. \code{sp_melt_wide} +\code{rsp_dcast} returns the wide form of the supplied +\code{respeciate} profile. \code{rsp_melt_wide} returns the (standard) long form of a previously widened profile. } \description{ Functions for reshaping (re)SPECIATE profiles -\code{sp_dcast} and \code{sp_melt_wide} reshape supplied -(re)SPECIATE profile(s). \code{sp_dcast} converts these from their supplied +\code{rsp_dcast} and \code{rsp_melt_wide} reshape supplied +(re)SPECIATE profile(s). \code{rsp_dcast} converts these from their supplied long form to a widened form, \code{dcast}ing the data set by either species or profiles depending on the \code{widen} setting applied. -\code{sp_dcast_profile} and \code{sp_dcast_species} are wrappers for these -options. \code{sp_melt_wide} attempts to return a previously widened data +\code{rsp_dcast_profile} and \code{rsp_dcast_species} are wrappers for these +options. \code{rsp_melt_wide} attempts to return a previously widened data set to the original long form. } \note{ Conventional long-to-wide reshaping of data, or \code{dcast}ing, can be slow and memory inefficient. So, \code{respeciate} uses the \code{\link[data.table:dcast]{data.table::dcast}} -method. The \code{sp_dcast_species} method, +method. The \code{rsp_dcast_species} method, applied using \code{widen='species'}, is effectively: \code{dcast(..., PROFILE_CODE+PROFILE_NAME~SPECIES_NAME, value.var="WEIGHT_PERCENT")} diff --git a/man/rsp.x.Rd b/man/rsp.x.Rd new file mode 100644 index 0000000..3e0119d --- /dev/null +++ b/man/rsp.x.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rsp.x.R +\name{rsp.x} +\alias{rsp.x} +\alias{rsp_x_copy} +\alias{rsp_x} +\alias{rsp_x_nalkane} +\alias{rsp_x_btex} +\title{rsp_x_ functions for grouping and subsetting (re)SPECIATE profiles} +\usage{ +rsp_x_copy(rsp, ref = NULL, by = "species_id") + +rsp_x_nalkane(rsp) + +rsp_x_btex(rsp) +} +\arguments{ +\item{rsp}{a \code{respeciate} object, a \code{data.frame} of (re)SPECIATE +profiles.} + +\item{ref}{(\code{rsp_x_copy} only) a second \code{respeciate} object, to +be used as reference when subsetting (or conditioning) \code{rsp}.} + +\item{by}{(\code{rsp_x_copy} only) character, the name of the column +in \code{ref} to copy when subsetting (or conditioning) \code{rsp}.} +} +\value{ +\code{rsp_x_copy} outputs can be modified but, by default, it +identifies all species in the supplied reference data set. + +\code{rsp_x_nalkane} identifies (straight chain) C1 to C40 n-alkanes. + +\code{rsp_x_btex} identifies the BTEX group of aromatic hydrocarbons +(benzene, toluene, ethyl benzene, and M-, O- and P-xylene). +} +\description{ +\code{rsp_x_} functions generate a vector of assignment +terms and can be used to subset or condition a supplied (re)SPECIATE +\code{data.frame}. + +Most commonly, the \code{rsp_x_} functions accept a single input, a +(re)SPECIATE \code{data.frame} and return a logical vector of +length \code{nrow(x)}, identifying species of interest as +\code{TRUE}. So, for example, they can be used when +\code{\link{subset}}ting in the form: + +\code{subset(rsp, rsp_x_nalkane(rsp))} + +... to extract just n-alkane records from a supplied \code{respeciate} +object \code{rsp}. + +However, some accept additional arguments. For example, \code{rsp_x_copy} +also accepts a reference data set, \code{ref}, and a column identifier, +\code{by}, and tests \code{rsp$by \%in\% unique(ref$by)}. +} diff --git a/man/sp.Rd b/man/sp.Rd deleted file mode 100644 index 135b82c..0000000 --- a/man/sp.Rd +++ /dev/null @@ -1,100 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.R -\name{sp} -\alias{sp} -\alias{sp_profile} -\alias{sp_build_rsp_x} -\title{sp_ functions} -\usage{ -sp_profile(code, ..., include.refs = FALSE) - -sp_build_rsp_x( - x, - profile_code, - profile_name, - species_name, - species_id, - value, - ... -) -} -\arguments{ -\item{code}{character, numeric or data.frame, the SPECIATE code -of the required profile (EPA SPECIATE identifier PROFILE_CODE). This is -typically one or concatenated character or numeric entries, but can also -be a \code{respeciate} object or similar \code{data.frame} containing -the \code{code}s as a named \code{PROFILE_NAME} column.} - -\item{...}{additional arguments, ignored except by \code{sp_profile} which -treats these as additional sources for \code{code}.} - -\item{include.refs}{logical, (for \code{sp_profile} only) include profile -reference information when getting the requested profile(s) from the -archive, default \code{FALSE}.} - -\item{x}{(for \code{sp_build}s only) A \code{data.frame} or similar (i.e. -something that can be converted to a \code{data.frame} using -\code{as.data.frame}) to be converted into a \code{respeciate} object for -comparison with SPECIATE profiles.} - -\item{profile_name, profile_code}{(for \code{sp_build}s only; -\code{character}) The name of the column in \code{x} containing -profile name and code, respectively. If not already named according -to SPECIATE conventions, at least one of these will need to be assigned.} - -\item{species_name, species_id}{(for \code{sp_build}s only; -\code{character}) The name of the column in \code{x} containing -species name and id, respectively. If not already named according -to SPECIATE conventions, at least one of these will need to be assigned.} - -\item{value}{(for \code{sp_build}s only; \code{character}) The name -of the column in \code{x} containing measurement values. If not already -named according to SPECIATE conventions, this will need to be assigned.} -} -\value{ -\code{sp_profile} returns a object of -\code{respeciate} class, a \code{data.frame} containing a -(re)SPECIATE profile. - -\code{sp_build}s attempt to build and return a (re)SPECIATE-like profile -that can be compared with with data in re(SPECIATE). -} -\description{ -sp function to get profiles from the R (re)SPECIATE archive - -\code{\link{sp_profile}} extracts a -SPECIATE profile from the local (re)SPECIATE archive. -} -\note{ -With \code{sp_profile}: - -The option \code{include.refs} adds profile source reference -information to the returned \code{respeciate} data set. The default option -is to not include these because some profiles have several associated -references and including these replicates records, once per reference. -\code{respeciate} code is written to handle this but if you are developing -own methods or code and include references in any profile build you may be -biasing some analyses in favor of those multiple-reference profile unless -you check and account such cases. - -With \code{sp_build}s: - -It is particularly IMPORTANT that you use EPA SPECIATE conventions when -assign species information if you want to compare your data with SPECIATE -profiles. Currently, working on option to improve on this (and very happy -to discuss if anyone has ideas), but current best suggestion is: (1) -identify the SPECIATE species code for all the species in your data set, -and (2) assign these as \code{species_id} when \code{sp_build}ing. The -function will then associate the \code{species_name}. -} -\examples{ -\dontrun{ -x <- sp_profile(c(8833, 8850)) -plot(x)} -} -\references{ -Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -The development and uses of EPA SPECIATE database. -Atmospheric Pollution Research, 1(4), pp.196-206. -} diff --git a/man/sp.info.Rd b/man/sp.info.Rd deleted file mode 100644 index 8126699..0000000 --- a/man/sp.info.Rd +++ /dev/null @@ -1,63 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.info.R -\name{sp.info} -\alias{sp.info} -\alias{sp_info} -\alias{sp_profile_info} -\alias{sp_species_info} -\alias{sp_find_profile} -\alias{sp_find_species} -\title{re(SPECIATE) information} -\usage{ -sp_info() - -sp_profile_info(..., by = "keywords", partial = TRUE) - -sp_find_profile(...) - -sp_species_info(..., by = "species_name", partial = TRUE) - -sp_find_species(...) -} -\arguments{ -\item{...}{character(s), any search term(s) to use when searching -the local (re)SPECIATE archive for relevant records using -\code{sp_profile_info} or \code{sp_species_info}.} - -\item{by}{character, the section of the archive to -search, by default \code{'keywords'} for \code{sp_profile_info} and -\code{'species_names'} for \code{sp_species_info}.} - -\item{partial}{logical, if \code{TRUE} (default) -\code{sp_profile_info} or \code{sp_profile_info} use partial matching.} -} -\value{ -\code{sp_info} provides a brief version information report on the -currently installed (re)SPECIATE archive. - -\code{sp_profile_info} returns a \code{data.frame} of -profile information, as a \code{respeciate} object. -\code{sp_species_info} returns a \code{data.frame} of -species information as a \code{respeciate} object. -} -\description{ -Functions that provide (re)SPECIATE -source information. -\code{sp_info} generates a brief version report for the currently installed -(re)SPECIATE data sets. -\code{sp_profile_info} searches the currently installed (re)SPECIATE -data sets for profile records. -\code{sp_species_info} searches the currently installed (re)SPECIATE -data sets for species records. -} -\examples{ -\dontrun{ -profile <- "Ethanol" -pr <- sp_find_profile(profile) -pr - -species <- "Ethanol" -sp <- sp_find_species(species) -sp} - -} diff --git a/man/sp.pad.Rd b/man/sp.pad.Rd deleted file mode 100644 index ecdbb4e..0000000 --- a/man/sp.pad.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sp.pad.R -\name{sp.pad} -\alias{sp.pad} -\alias{sp_pad} -\title{(re)SPECIATE profile padding functions} -\usage{ -sp_pad(x, pad = "standard", drop.nas = TRUE) -} -\arguments{ -\item{x}{A \code{respeciate} object, a \code{data.frame} of re(SPECIATE) -profiles.} - -\item{pad}{character, type of meta data padding, current options -\code{'profile'}, \code{'species'}, \code{'weight'}, \code{reference}, -\code{'standard'} (default; all but \code{'reference'}), and \code{'all'} -('all').} - -\item{drop.nas}{logical, discard any rows where \code{WEIGHT_PERCENT} is -\code{NA}, default \code{TRUE}.} -} -\value{ -\code{sp_pad} returns \code{x}, with requested additional profile -and species meta-data added as additional \code{data.frame} columns. -See Note. -} -\description{ -Functions for padding \code{respeciate} objects. - -\code{sp_pad} pads a supplied (re)SPECIATE profile data set -with profile and species meta-data. -} -\note{ -Some data handling can remove (re)SPECIATE meta-data, -and \code{sp_pad}s provide a quick rebuild/repair. For example, -\code{\link{sp_dcast}}ing to a (by-species or by-profile) widened -form strips some meta-data, and padding is used as part of the -\code{\link{sp_melt_wide}} and padding is used to re-add this meta-data -when returning the data set to its standard long form. -} diff --git a/man/spec.Rd b/man/spec.Rd deleted file mode 100644 index a5069a3..0000000 --- a/man/spec.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/speciate.R -\name{spec} -\alias{spec} -\title{Access to the SPECIATE 5.1 US/EPA Tool} -\usage{ -spec(code) -} -\arguments{ -\item{code}{Character, PROFILE CODE required by EPA/Speciate} -} -\value{ -a data.frame with full information for the desired code (PROFILE_CODE) -} -\description{ -\code{\link{spec}} Return a speciate data.frame -} -\examples{ -\dontrun{ -code <- "8855" -x <- spec(code) -} -} -\references{ -Simon, H., Beck, L., Bhave, P.V., Divita, F., Hsu, Y., Luecken, D., -Mobley, J.D., Pouliot, G.A., Reff, A., Sarwar, G. and Strum, M., 2010. -The development and uses of EPA SPECIATE database. -Atmospheric Pollution Research, 1(4), pp.196-206. -} diff --git a/man/spq.Rd b/man/spq.Rd deleted file mode 100644 index 61a4a3c..0000000 --- a/man/spq.Rd +++ /dev/null @@ -1,48 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spq.R -\name{spq} -\alias{spq} -\alias{spq_gas} -\alias{spq_other} -\alias{spq_pm} -\alias{spq_pm.ae6} -\alias{spq_pm.ae8} -\alias{spq_pm.cr1} -\alias{spq_pm.simplified} -\title{spq_ quick access to common re(SPECIATE) sub-samples} -\usage{ -spq_gas() - -spq_other() - -spq_pm() - -spq_pm.ae6() - -spq_pm.ae8() - -spq_pm.cr1() - -spq_pm.simplified() -} -\value{ -\code{spq_} functions typically return a \code{respeciate} -\code{data.frame} of the requested profiles. - -For example: - -\code{sqr_gas} returns all gaseous profiles (\code{PROFILE_TYPE == 'GAS'}). - -\code{sqr_pm} returns all particulate matter (PM) profiles not classified -as a special PM type (\code{PROFILE_TYPE == 'PM'}). - -The special PM types are subsets profiles intended for special -applications, and these include \code{sqr_pm.ae6} (type \code{PM-AE6}), -\code{sqr_pm.ae8} (type \code{PM-AE8}), \code{sqr_pm.cr1} (type -\code{PM-CR1}), \code{sqr_pm.simplified} (type \code{PM-Simplified}) -and \code{sqr_other} (\code{PROFILE_TYPE == 'OTHER'}). -} -\description{ -\code{spq_} functions are quick access wrappers to commonly -requested re(SPECIATE) sub-samples. -} diff --git a/man/spx.Rd b/man/spx.Rd deleted file mode 100644 index f520964..0000000 --- a/man/spx.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spx.R -\name{spx} -\alias{spx} -\alias{spx_copy} -\alias{spx_} -\alias{spx_n_alkane} -\alias{spx_btex} -\title{spx_ functions for grouping and subsetting} -\usage{ -spx_copy(x, ref = NULL, by = "species_id") - -spx_n_alkane(x) - -spx_btex(x) -} -\arguments{ -\item{x}{a \code{respeciate} object, a \code{data.frame} of re(SPECIATE) -profiles.} - -\item{ref}{(\code{spx_copy} only) a second \code{respeciate} object, to -be used as reference when testing \code{x}.} - -\item{by}{(\code{spx_copy} only) character, the name of the column -in \code{ref} to copy when testing \code{x}.} -} -\value{ -\code{spx_copy} outputs can be modified but, by default, it -identifies all species in the supplied reference data set. - -\code{spx_n_alkane} identifies C1 to C40 n-alkanes. - -\code{spx_btex} identifies the BTEX group of aromatic hydrocarbons -(benzene, toluene, ethyl benzene, and M-, O- and P-xylene). -} -\description{ -\code{spx_} functions generate a vector of assignment -terms and can be used to subset or condition a supplied re(SPECIATE) -\code{data.frame}. - -Most commonly, the \code{spx_} functions accept a single input, a -re(SPECIATE) \code{data.frame} and return a logical vector of -length \code{nrow(x)}, identifying species of interest as -\code{TRUE}. So, for example, they can be used when -\code{\link{subset}}ting in the form: - -\code{subset(x, spx_n_alkane(x))} - -... to extract just n-alkane records from a \code{respeciate} object -\code{x}. - -However, some accept additional arguments. For example, \code{spx_copy} -also accepts a reference data set, \code{ref}, and a column identifier, -\code{by}, and tests \code{x$by \%in\% unique(ref$by)}. -}