diff --git a/R/data-documentation.R b/R/data-documentation.R index 8091b01..656aa7a 100644 --- a/R/data-documentation.R +++ b/R/data-documentation.R @@ -48,15 +48,17 @@ #' dat <- #' data.frame(party = as.character(docvars(data_corpus_dailnoconf1991, "party")), #' govt = coef(pred)[, "Govt"], -#' position = as.character(docvars(data_corpus_dailnoconf1991, "position")), -#' stringsAsFactors = FALSE) +#' position = as.character(docvars(data_corpus_dailnoconf1991, "position"))) #' bymedian <- with(dat, reorder(paste(party, position), govt, median)) -#' par(mar = c(5, 6, 4, 2)+.1) +#' oldpar <- par(no.readonly = TRUE) +#' par(mar = c(5, 6, 4, 2) + .1) #' boxplot(govt ~ bymedian, data = dat, #' horizontal = TRUE, las = 1, -#' xlab = "Degree of support for government") +#' xlab = "Degree of support for government", +#' ylab = "") #' abline(h = 7.5, col = "red", lty = "dashed") #' text(c(0.9, 0.9), c(8.5, 6.5), c("Goverment", "Opposition")) +#' par(oldpar) #' } "data_corpus_dailnoconf1991" diff --git a/R/textmodel-methods.R b/R/textmodel-methods.R index 1a36108..420510e 100644 --- a/R/textmodel-methods.R +++ b/R/textmodel-methods.R @@ -27,8 +27,11 @@ print.summary.textmodel <- function(x, digits = max(3L, getOption("digits") - 3L } #' Assign the summary.textmodel class to a list +#' +#' Assigns the class `summary.textmodel` to a list #' @param x a named list #' @keywords internal +#' @returns an object of class `summary.textmodel` #' @export as.summary.textmodel <- function(x) { class(x) <- c("summary.textmodel", "list") @@ -61,7 +64,7 @@ as.summary.textmodel <- function(x) { # } #' Print methods for textmodel features estimates - +#' #' This is a helper function used in `print.summary.textmodel`. #' @param x a coefficients_textmodel object #' @param digits minimal number of *significant digits*, see @@ -83,12 +86,13 @@ print.coefficients_textmodel <- function(x, digits = max(3L, getOption("digits") } #' Coerce various objects to coefficients_textmodel - -#' This is a helper function used in `summary.textmodel_*`. +#' +#' Helper functions used in `summary.textmodel_*()`. #' @param x an object to be coerced #' @importFrom stats coefficients #' @importFrom stats coef #' @keywords internal +#' @returns an object with the class tag of `coefficients_textmodel` #' @export as.coefficients_textmodel <- function(x) { UseMethod('as.coefficients_textmodel') @@ -138,6 +142,7 @@ print.statistics_textmodel <- function(x, digits = max(3L, getOption("digits") - #' This is a helper function used in `summary.textmodel_*`. #' @param x an object to be coerced #' @keywords internal textmodel +#' @returns an object of class `statistics_textmodel` #' @export as.statistics_textmodel <- function(x) { UseMethod("as.statistics_textmodel") diff --git a/R/textmodel_affinity.R b/R/textmodel_affinity.R index 6b06e7f..f949f68 100644 --- a/R/textmodel_affinity.R +++ b/R/textmodel_affinity.R @@ -1,6 +1,6 @@ #' Class affinity maximum likelihood text scaling model #' -#' `textmodel_affinity` implements the maximum likelihood supervised text +#' `textmodel_affinity()` implements the maximum likelihood supervised text #' scaling method described in Perry and Benoit (2017). #' @param x the [dfm] or [bootstrap_dfm] object on which the model #' will be fit. Does not need to contain only the training documents, since @@ -14,6 +14,15 @@ #' defaults to 0.5 #' @param verbose logical; if `TRUE` print diagnostic information during #' fitting. +#' @returns A `textmodel_affinity` class list object, with elements: +#' * `smooth` a numeric vector of length two for the smoothing parameters `smooth` +#' and `ref_smooth` +#' `x` the input model matrix `x` +#' `y` the vector of class training labels `y` +#' `p` a feature \eqn{\times} class sparse matrix of estimated class affinities +#' * `support` logical vector indicating whether a feature was included in computing +#' class affinities +#' * `call` the model call #' @author Patrick Perry and Kenneth Benoit #' @references Perry, P.O. & Benoit, K.R. (2017). Scaling Text with #' the Class Affinity Model. @@ -34,7 +43,7 @@ #' @importFrom stats sd predict #' @importFrom quanteda dfm_group as.dfm #' @seealso [predict.textmodel_affinity()] for methods of applying a -#' fitted [textmodel_affinity] model object to predict quantities from +#' fitted [textmodel_affinity()] model object to predict quantities from #' (other) documents. textmodel_affinity <- function(x, y, exclude = NULL, smooth = 0.5, ref_smooth = 0.5, @@ -46,7 +55,7 @@ textmodel_affinity <- function(x, y, exclude = NULL, textmodel_affinity.default <- function(x, y, exclude = NULL, smooth = 0.5, ref_smooth = 0.5, verbose = quanteda_options("verbose")) { - stop(friendly_class_undefined_message(class(x), "textmodel_affinity")) + stop(check_class(class(x), "textmodel_affinity")) } @@ -90,7 +99,7 @@ textmodel_affinity.dfm <- function(x, y, exclude = NULL, y = y, p = p, support = fitted$support, - method = "affinity", + # method = "affinity", call = match.call() ) class(result) <- "textmodel_affinity" @@ -139,7 +148,12 @@ textmodel_affinity.dfm_bootstrap <- function(x, y, exclude = NULL, #' @param smooth a misnamed smoothing parameter, either a scalar or a vector #' equal in length to the number of documents #' @author Patrick Perry -#' @return a list with stuff +#' @returns a list containing: +#' * `coefficients` point estimates of theta +#' * `se` (likelihood) standard error of theta +#' * `cov` covariance matrix +#' * `smooth` values of the smoothing parameter +#' * `support` logical indicating if the feature was included #' @examples #' p <- matrix(c(c(5/6, 0, 1/6), c(0, 4/5, 1/5)), nrow = 3, #' dimnames = list(c("A", "B", "C"), NULL)) @@ -449,8 +463,21 @@ print.textmodel_affinity <- function(x, ...) { #' @param level probability level for confidence interval width #' @param newdata dfm on which prediction should be made #' @param ... unused -#' @return `predict()` returns a list of predicted affinity textmodel -#' quantities. +#' @returns `predict()` returns a list of predicted affinity textmodel +#' quantities, containing: +#' * `coefficients` a numeric matrix of affinity estimates (coefficients) for +#' each class (columns) for each document (rows) +#' * `se` a numeric matrix of likelihood standard errors for affinity coefficients +#' each class (columns) for each document (rows) +#' * `cov` an array of covariance matrices for each affinity class, one per document +#' * `smooth` a numeric vector of length two for the smoothing parameters `smooth` +#' and `ref_smooth` from [textmodel_affinity()] +#' * `newdata` a [dfm][quanteda::dfm] on which prediction has been made +#' * `train` a logical vector indicating which documents were used in training the model +#' * `level` the confidence level for computing standard errors +#' * `p` the `p` return from `textmodel_affinity` +#' * `support` logical vector indicating whether a feature was included in computing +#' class affinities #' @importFrom methods new #' @importFrom stats predict #' @method predict textmodel_affinity @@ -497,7 +524,7 @@ print.predict.textmodel_affinity <- function(x, ...) { #' @rdname predict.textmodel_affinity #' @method coef predict.textmodel_affinity -#' @return `coef()` returns a document \eqn{\times} class matrix of class +#' @returns `coef()` returns a document \eqn{\times} class matrix of class #' affinities for each document. #' @export coef.predict.textmodel_affinity <- function(object, ...) { @@ -511,7 +538,7 @@ coefficients.predict.textmodel_affinity <- function(object, ...) { } #' @rdname predict.textmodel_affinity -#' @return +#' @returns #' `residuals()` returns a document-by-feature matrix of residuals. #' `resid()` is an alias. #' @method residuals predict.textmodel_affinity @@ -540,7 +567,7 @@ resid.predict.textmodel_affinity <- function(object, ...) { #' @rdname predict.textmodel_affinity #' @method rstandard predict.textmodel_affinity -#' @return `rstandard()` is a shortcut to return the pearson residuals. +#' @returns `rstandard()` is a shortcut to return the Pearson residuals. #' @importFrom stats rstandard sd #' @export rstandard.predict.textmodel_affinity <- function(model, ...) { @@ -552,10 +579,10 @@ rstandard.predict.textmodel_affinity <- function(model, ...) { #' Compute feature influence from a predicted textmodel_affinity object #' -#' Computes the influence of features on scaled [textmodel_affinity] +#' Computes the influence of features on scaled [textmodel_affinity()] #' applications. -#' @param model a predicted -#' [textmodel_affinity][predict.textmodel_affinity] object +#' @param model a predicted [textmodel_affinity()][predict.textmodel_affinity] +#' object #' @param subset whether to use all data or a subset (for instance, exclude the #' training set) #' @param ... unused @@ -563,9 +590,24 @@ rstandard.predict.textmodel_affinity <- function(model, ...) { #' @keywords textmodel internal #' @importFrom stats influence #' @method influence predict.textmodel_affinity +#' @returns a named list classed as [influence.predict.textmodel_affinity] that +#' contains +#' * `norm` a document by feature class sparse matrix of normalised influence +#' measures +#' * `count` a vector of counts of each non-zero feature in the input matrix +#' * `rate` the normalised feature count for each non-zero feature in the input +#' matrix +#' * `mode` an integer vector of 1 or 2 indicating the class which the feature +#' is influencing, for each non-zero feature +#' * `levels` a character vector of the affinity class labels +#' * `subset` a logical vector indicating whether the document was included in +#' the computation of influence; `FALSE` for documents assigned a class label +#' in training the model +#' * `support` logical vector for each feature matching the same return from +#' [predict.textmodel_affinity] #' @examples -#' tmot <- textmodel_affinity(quanteda::data_dfm_lbgexample, y = c("L", NA, NA, NA, "R", NA)) -#' pred <- predict(tmot) +#' tmod <- textmodel_affinity(quanteda::data_dfm_lbgexample, y = c("L", NA, NA, NA, "R", NA)) +#' pred <- predict(tmod) #' influence(pred) #' @export influence.predict.textmodel_affinity <- function(model, subset = !train, ...) { @@ -665,6 +707,24 @@ print.influence.predict.textmodel_affinity <- function(x, n = 30, ...) { #' @rdname textmodel_affinity-internal #' @method summary influence.predict.textmodel_affinity +#' @returns `summary.influence.predict.textmodel_affinity()` returns a list +#' classes as `summary.influence.predict.textmodel_affinity` that includes: +#' +#' * `word` the feature name +#' * `count` the total counts of each feature for which influence was computed +#' * `mean`, `median`, `sd`, `max` mean, median, standard deviation, and maximum +#' values of influence for each feature, computed across classes +#' * `direction` an integer vector of 1 or 2 indicating the class which the feature +#' is influencing +#' * `rate` a document by feature class sparse matrix of normalised influence +#' measures +#' * `count` a vector of counts of each non-zero feature in the input matrix +#' * `rate` the median of `rate` from [influence.predict.textmodel_affinity()] +#' * `support` logical vector for each feature matching the same return from +#' [predict.textmodel_affinity()] +#' +#' the mean, the standard deviation, the direction of the influence, the rate, +#' and the support #' @importFrom stats median #' @export summary.influence.predict.textmodel_affinity <- function(object, ...) { @@ -731,9 +791,12 @@ summary.influence.predict.textmodel_affinity <- function(object, ...) { levels <- seq_along(labels) max_dir <- factor(max_dir, levels, labels) - result <- list(word = words, count = count_val, - mean = mean_val, median = med_val, - sd = sd_val, max = max_val, + result <- list(word = words, + count = count_val, + mean = mean_val, + median = med_val, + sd = sd_val, + max = max_val, direction = max_dir, rate = med_rate, support = object$support) @@ -789,6 +852,3 @@ interleave <- function(v1, v2) { ord2 <- 2 * (seq_along(v2)) c(v1, v2)[order(c(ord1, ord2))] } - - - diff --git a/R/textmodel_ca.R b/R/textmodel_ca.R index be62de5..6f9d10d 100644 --- a/R/textmodel_ca.R +++ b/R/textmodel_ca.R @@ -45,7 +45,7 @@ textmodel_ca <- function(x, smooth = 0, nd = NA, sparse = FALSE, #' @export textmodel_ca.default <- function(x, smooth = 0, nd = NA, sparse = FALSE, residual_floor = 0.1) { - stop(friendly_class_undefined_message(class(x), "textmodel_ca")) + stop(check_class(class(x), "textmodel_ca")) } #' @export @@ -151,6 +151,14 @@ textmodel_ca.dfm <- function(x, smooth = 0, nd = NA, sparse = FALSE, #' extracted #' @param ... unused #' @keywords textmodel internal +#' @returns a list containing numeric vectors of feature and document +#' coordinates. Includes `NA` vectors of standard errors for consistency with +#' other models' coefficient outputs, and for the possibility of having these +#' computed in the future. +#' * `coef_feature` column coordinates of the features +#' * `coef_feature_se` feature length vector of `NA` values +#' * `coef_document` row coordinates of the documents +#' * `coef_document_se` document length vector of `NA` values #' @export coef.textmodel_ca <- function(object, doc_dim = 1, feat_dim = 1, ...) { list(coef_feature = object$colcoord[, feat_dim], diff --git a/R/textmodel_lr.R b/R/textmodel_lr.R index 3fd53d5..79076d2 100644 --- a/R/textmodel_lr.R +++ b/R/textmodel_lr.R @@ -12,6 +12,14 @@ #' in \code{train}. (These will be converted to factors if not already #' factors.) #' @param ... additional arguments passed to [`cv.glmnet()`][glmnet::cv.glmnet()] +#' @returns an object of class `textmodel_lr`, a list containing: +#' * `x`, `y` the input model matrix and input training class labels +#' * `algorithm` character; the type and family of logistic regression model used in calling +#' [`cv.glmnet()`][glmnet::cv.glmnet()] +#' * `type` the type of associated with `algorithm` +#' * `classnames` the levels of training classes in `y` +#' * `lrfitted` the fitted model object from [`cv.glmnet()`][glmnet::cv.glmnet()] +#' * `call` the model call #' @seealso [`cv.glmnet()`][glmnet::cv.glmnet()], [predict.textmodel_lr()], #' [coef.textmodel_lr()] #' @references @@ -49,7 +57,7 @@ textmodel_lr <- function(x, y, ...) { #' @export textmodel_lr.default <- function(x, y, ...) { - stop(quanteda:::friendly_class_undefined_message(class(x), "textmodel_lr")) + stop(check_class(class(x), "textmodel_lr")) } #' @export @@ -219,6 +227,9 @@ coefficients.textmodel_lr <- function(object, ...) { #' @param n how many coefficients to print before truncating #' @param ... additional arguments not used #' @keywords textmodel internal +#' @returns a `summary.textmodel` classed list containing elements from the +#' call to `textmodel_lr()`, including the call, statistics for lambda, and +#' the estimated feature scores #' @method summary textmodel_lr #' @export summary.textmodel_lr <- function(object, n = 30, ...) { diff --git a/R/textmodel_lsa.R b/R/textmodel_lsa.R index 830b80e..019c120 100644 --- a/R/textmodel_lsa.R +++ b/R/textmodel_lsa.R @@ -5,6 +5,12 @@ #' @param x the [dfm] on which the model will be fit #' @param nd the number of dimensions to be included in output #' @param margin margin to be smoothed by the SVD +#' @returns a `textmodel_lsa` class object, a list containing: +#' * `sk` a numeric vector containing the d values from the SVD +#' * `docs` document coordinates from the SVD (u) +#' * `features` feature coordinates from the SVD (v) +#' * `matrix_low_rank` the multiplication of udv' +#' * `data` the input data as a CSparseMatrix from the \pkg{Matrix} package #' @author Haiyan Wang and Kohei Watanabe #' @details [svds][RSpectra::svds] in the \pkg{RSpectra} package is applied to #' enable the fast computation of the SVD. diff --git a/R/textmodel_nb.R b/R/textmodel_nb.R index cadb380..d1c3561 100644 --- a/R/textmodel_nb.R +++ b/R/textmodel_nb.R @@ -106,7 +106,7 @@ textmodel_nb <- function(x, y, smooth = 1, textmodel_nb.default <- function(x, y, smooth = 1, prior = c("uniform", "docfreq", "termfreq"), distribution = c("multinomial", "Bernoulli")) { - stop(friendly_class_undefined_message(class(x), "textmodel_nb")) + stop(check_class(class(x), "textmodel_nb")) } #' @export @@ -200,7 +200,7 @@ textmodel_nb.dfm <- function(x, y, smooth = 1, predict.textmodel_nb <- function(object, newdata = NULL, type = c("class", "probability", "logposterior"), force = FALSE, ...) { - unused_dots(...) + check_dots(...) type <- match.arg(type) if ("Pc" %in% names(object)) { names(object)[which(names(object) == "Pc")] <- "priors" @@ -278,6 +278,8 @@ print.textmodel_nb <- function(x, ...) { #' @param object output from [textmodel_nb()] #' @param n how many coefficients to print before truncating #' @param ... additional arguments not used +#' @returns a `summary.textmodel` classed list containing the call, the class +#' priors, and the estimated feature scores #' @keywords textmodel internal #' @method summary textmodel_nb #' @export diff --git a/R/textmodel_svm.R b/R/textmodel_svm.R index 2f873eb..8897e80 100644 --- a/R/textmodel_svm.R +++ b/R/textmodel_svm.R @@ -16,6 +16,15 @@ #' [LiblineaR::LiblineaR()]; default is `1` for L2-regularized L2-loss support #' vector classification (dual) #' @param ... additional arguments passed to [LiblineaR::LiblineaR()] +#' @returns an object of class `textmodel_svm`, a list containing: +#' * `x`, `y`, `weights`, `type`: argument values from the call parameters +#' * `algorithm` character label of the algorithm used in the call to +#' [LiblineaR::LiblineaR()] +#' * `classnames` levels of `y` +#' * `bias` the value of `Bias` returned from [LiblineaR::LiblineaR()] +#' * `svmlinfitted` the fitted model object passed from the call to +#' LiblineaR::LiblineaR()] +#' * `call` the model call #' @references #' R. E. Fan, K. W. Chang, C. J. Hsieh, X. R. Wang, and C. J. Lin. (2008) #' LIBLINEAR: A Library for Large Linear Classification. @@ -42,7 +51,7 @@ textmodel_svm <- function(x, y, weight = c("uniform", "docfreq", "termfreq"), ty #' @export textmodel_svm.default <- function(x, y, weight = c("uniform", "docfreq", "termfreq"), type = 1, ...) { - stop(friendly_class_undefined_message(class(x), "textmodel_svm")) + stop(check_class(class(x), "textmodel_svm")) } #' @importFrom LiblineaR LiblineaR @@ -115,7 +124,7 @@ textmodel_svm.dfm <- function(x, y, weight = c("uniform", "docfreq", "termfreq") predict.textmodel_svm <- function(object, newdata = NULL, type = c("class", "probability"), force = TRUE, ...) { - unused_dots(...) + check_dots(...) type <- match.arg(type) @@ -166,6 +175,8 @@ print.textmodel_svm <- function(x, ...) { #' @param object output from [textmodel_svm()] #' @param n how many coefficients to print before truncating #' @param ... additional arguments not used +#' @returns a `summary.textmodel` classed list containing the call and the +#' estimated feature scores #' @keywords textmodel internal #' @method summary textmodel_svm #' @export @@ -206,6 +217,7 @@ print.predict.textmodel_svm <- function(x, ...) { #' @importFrom SparseM as.matrix.csr #' @importFrom methods new #' @method as.matrix.csr dfm +#' @returns a \pkg{SparseM} object of class [matrix.csr][SparseM::matrix.csr] #' @keywords internal as.matrix.csr.dfm <- function(x) { # convert first to column sparse format diff --git a/R/textmodel_svmlin.R b/R/textmodel_svmlin.R index 8f37704..ec5a8ac 100644 --- a/R/textmodel_svmlin.R +++ b/R/textmodel_svmlin.R @@ -62,7 +62,7 @@ textmodel_svmlin.default <- function(x, y, intercept = TRUE, # x_u = NULL, # lambda_u = 1, max_switch = 10000, # pos_frac = 0.5, cp = 1, cn = 1, scale = FALSE, center = FALSE) { - stop(friendly_class_undefined_message(class(x), "textmodel_svmlin")) + stop(check_class(class(x), "textmodel_svmlin")) } #' @export @@ -137,7 +137,7 @@ textmodel_svmlin.dfm <- function(x, y, intercept = TRUE, # x_u = NULL, predict.textmodel_svmlin <- function(object, newdata = NULL, type = c("class", "probability"), force = FALSE, ...) { - unused_dots(...) + check_dots(...) type <- match.arg(type) @@ -183,6 +183,8 @@ print.textmodel_svmlin <- function(x, ...) { #' @param object output from [textmodel_svmlin()] #' @param n how many coefficients to print before truncating #' @param ... additional arguments not used +#' @returns a `summary.textmodel` classed list containing the call and the +#' estimated feature scores #' @keywords textmodel internal #' @method summary textmodel_svmlin #' @importFrom utils head diff --git a/R/textmodel_wordfish.R b/R/textmodel_wordfish.R index a35d246..6d84fb7 100644 --- a/R/textmodel_wordfish.R +++ b/R/textmodel_wordfish.R @@ -121,7 +121,7 @@ textmodel_wordfish.default <- function(x, dir = c(1, 2), abs_err = FALSE, svd_sparse = TRUE, residual_floor = 0.5) { - stop(friendly_class_undefined_message(class(x), "textmodel_wordfish")) + stop(check_class(class(x), "textmodel_wordfish")) } #' @export @@ -284,6 +284,8 @@ print.textmodel_wordfish <- function(x, ...) { #' @param object a [textmodel_wordfish] object #' @param n maximum number of features to print in summary #' @param ... unused +#' @returns a `summary.textmodel` classed list containing the call, the +#' estimated document positions, and the estimated feature scores #' @export #' @method summary textmodel_wordfish #' @keywords internal textmodel diff --git a/R/textmodel_wordscores.R b/R/textmodel_wordscores.R index 3f71a37..9296136 100644 --- a/R/textmodel_wordscores.R +++ b/R/textmodel_wordscores.R @@ -63,7 +63,7 @@ textmodel_wordscores <- function(x, y, scale = c("linear", "logit"), smooth = 0) #' @export textmodel_wordscores.default <- function(x, y, scale = c("linear", "logit"), smooth = 0) { - stop(friendly_class_undefined_message(class(x), "textmodel_wordscores")) + stop(check_class(class(x), "textmodel_wordscores")) } #' @export @@ -159,7 +159,7 @@ predict.textmodel_wordscores <- function(object, force = TRUE, ...) { - unused_dots(...) + check_dots(...) interval <- match.arg(interval) rescaling <- match.arg(rescaling) diff --git a/R/textplot_influence.R b/R/textplot_influence.R index 0497055..9e3a332 100644 --- a/R/textplot_influence.R +++ b/R/textplot_influence.R @@ -6,6 +6,9 @@ #' fitted or predicted scaling model object to be plotted #' @param n the number of features whose influence will be plotted #' @param ... additional arguments passed to [plot()] +#' @returns Creates a base R plot of feature influences of the median influence +#' by the log10 median rate of the feature, and invisibly returns the elements +#' from the call to [plot()]. #' @seealso [textmodel_affinity()] #' @importFrom graphics plot #' @export @@ -22,7 +25,7 @@ textplot_influence <- function(x, n = 30, ...) { #' @export textplot_influence.default <- function(x, n = 30, ...) { - stop(friendly_class_undefined_message(class(x), "textplot_influence")) + stop(check_class(class(x), "textplot_influence")) } #' @export diff --git a/R/utils.R b/R/utils.R index 7a5c1f0..da06c00 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,8 @@ +check_dots <- quanteda:::check_dots +check_class <- quanteda:::check_class +message_error <- quanteda:::message_error +catm <- quanteda:::catm + #' Internal function to match a dfm features to a target set #' #' Takes a dfm and a set of features, and makes them match the features listed @@ -6,6 +11,9 @@ #' @param features character; a vector of feature names #' @param force logical; if `TRUE`, make the new dfm conform to the vector of #' features, otherwise return an error message +#' @returns a [dfm][quanteda::dfm] from the \pkg{quanteda} package containing +#' only `features` as columns, in the same order as `features`. A warning message +#' is printed if some feature names from `features` are not matched in `x`. #' @keywords internal dfm #' @importFrom quanteda is.dfm featnames dfm_match #' @examples @@ -27,64 +35,68 @@ force_conformance <- function(x, features, force = TRUE) { } } -#' Raise warning of unused dots -#' @param ... dots to check -#' @keywords internal -unused_dots <- function(...) { - arg <- names(list(...)) - if (length(arg) == 1) { - warning(arg[1], " argument is not used.", call. = FALSE) - } else if (length(arg) > 1) { - warning(paste0(arg, collapse = ", "), " arguments are not used.", call. = FALSE) - } -} +# Raise warning of unused dots +# @param ... dots to check +# @keywords internal +# unused_dots <- function(...) { +# arg <- names(list(...)) +# if (length(arg) == 1) { +# warning(arg[1], " argument is not used.", call. = FALSE) +# } else if (length(arg) > 1) { +# warning(paste0(arg, collapse = ", "), " arguments are not used.", call. = FALSE) +# } +# } + + +# #' Print friendly object class not defined message +# #' +# #' Checks valid methods and issues a friendlier error message in case the method is +# #' undefined for the supplied object type. +# #' @param object_class character describing the object class +# #' @param function_name character which is the function name +# #' @keywords internal +# #' @examples +# #' # as.tokens.default <- function(x, concatenator = "", ...) { +# #' # stop(quanteda:::friendly_class_undefined_message(class(x), "as.tokens")) +# #' # } +# friendly_class_undefined_message <- function(object_class, function_name) { +# valid_object_types <- as.character(utils::methods(function_name)) +# valid_object_types <- stringi::stri_replace_first_fixed(valid_object_types, +# paste0(function_name, "."), "") +# valid_object_types <- valid_object_types[valid_object_types != "default"] +# paste0(function_name, "() only works on ", +# paste(valid_object_types, collapse = ", "), +# " objects.") +#} -#' Print friendly object class not defined message -#' -#' Checks valid methods and issues a friendlier error message in case the method is -#' undefined for the supplied object type. -#' @param object_class character describing the object class -#' @param function_name character which is the function name -#' @keywords internal -#' @examples -#' # as.tokens.default <- function(x, concatenator = "", ...) { -#' # stop(quanteda:::friendly_class_undefined_message(class(x), "as.tokens")) -#' # } -friendly_class_undefined_message <- function(object_class, function_name) { - valid_object_types <- as.character(utils::methods(function_name)) - valid_object_types <- stringi::stri_replace_first_fixed(valid_object_types, - paste0(function_name, "."), "") - valid_object_types <- valid_object_types[valid_object_types != "default"] - paste0(function_name, "() only works on ", - paste(valid_object_types, collapse = ", "), - " objects.") -} -#' Return an error message -#' @param key type of error message -#' @keywords internal -message_error <- function(key = NULL) { - msg <- c("dfm_empty" = "dfm must have at least one non-zero value", - "fcm_empty" = "fcm must have at least one non-zero value", - "fcm_context" = "fcm must be created with a document context", - "matrix_mismatch" = "matrix must have the same rownames and colnames", - "docnames_mismatch" = "docnames must the the same length as x", - "docvars_mismatch" = "data.frame must have the same number of rows as documents", - "docvars_invalid" = "document variables cannot begin with the underscore", - "docvar_nofield" = "you must supply field name(s)", - "docvar_nocolname" = "data.frame must have column names") - if (is.null(key) || !key %in% names(msg)) { - return("") - } - return(unname(msg[key])) -} + +# #' Return an error message +# #' @param key type of error message +# #' @keywords internal +# message_error <- function(key = NULL) { +# msg <- c("dfm_empty" = "dfm must have at least one non-zero value", +# "fcm_empty" = "fcm must have at least one non-zero value", +# "fcm_context" = "fcm must be created with a document context", +# "matrix_mismatch" = "matrix must have the same rownames and colnames", +# "docnames_mismatch" = "docnames must the the same length as x", +# "docvars_mismatch" = "data.frame must have the same number of rows as documents", +# "docvars_invalid" = "document variables cannot begin with the underscore", +# "docvar_nofield" = "you must supply field name(s)", +# "docvar_nocolname" = "data.frame must have column names") +# if (is.null(key) || !key %in% names(msg)) { +# return("") +# } +# return(unname(msg[key])) +# } # rdname catm # messages() with some of the same syntax as cat(): takes a sep argument and # does not append a newline by default -catm <- function(..., sep = " ", appendLF = FALSE) { - message(paste(..., sep = sep), appendLF = appendLF) -} +# catm <- function(..., sep = " ", appendLF = FALSE) { +# message(paste(..., sep = sep), appendLF = appendLF) +# } + ## make cols add up to one diff --git a/cran-comments.md b/cran-comments.md index fd7632d..e90978a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -6,6 +6,83 @@ To remedy the problems that led to the package being archived on Mar 15, 2023, s * Fix compiler warnings appearing in devel versions * Fix URL for large dataset used in vignette +Upon resubmission to be restored from the archive, a number of new issues were raised that we also addressed: + +> Please add \value to .Rd files regarding exported methods and explain the +> functions results in the documentation. Please write about the structure of the +> output (class) and also what the output means. (If a function does not return a +> value, please document that too, e.g. \value{No return value, called for side +> effects} or similar) +> Missing Rd-tags in up to 20 .Rd files, e.g.: +> as.coefficients_textmodel.Rd: \value +> as.statistics_textmodel.Rd: \value +> as.summary.textmodel.Rd: \value +> coef.textmodel_ca.Rd: \value +> influence.predict.textmodel_affinity.Rd: \value +> print.coefficients_textmodel.Rd: \value +> ... + +We combed through the entire documentation set and have added or improved return value descriptions to the following functions. +* `affinity()` +* `as.coefficients_textmodel()` +* `as.matrix.csr.dfm()` +* `as.statistics_textmodel()` +* `as.summary.textmodel()` +* `coef.textmodel_ca()` +* `force_conformance()` +* `influence.predict.textmodel_affinity()` +* `summary.textmodel_lr()` +* `summary.textmodel_nb()` +* `summary.textmodel_svm()` +* `summary.textmodel_svmlin()` +* `summary.textmodel_wordfish()` +* `summary.textmodel_affinity()` +* `textmodel_affinity()` +* `textmodel_lr()` +* `textmodel_lsa()` +* `textmodel_svm()` +* `textmodel_svmlin()` +* `textmodel_wordscores()` +* `textplot_influence()` + +We did not document returns for the `print` methods, as base::print.default() does not have a documented return value (and there is no return value). + +> You have examples for unexported functions. Please either omit these examples or export these functions. +> Examples for unexported function +> data_corpus_EPcoaldebate() in: +> force_conformance.Rd +> data_corpus_irishbudget2010() in: +> friendly_class_undefined_message.Rd + +`data_corpus_EPcoaldebate` is a data object in the package, not an unexported function. It is fully documented. It is also not referenced in force_conformance.Rd so we are not sure why this point was raised. + +`data_corpus_irishbudget2010` is a data object in the package, not an unexported function. It is fully documented. It is also not referenced in friendly_class_undefined_message.Rd so we are not sure why this point was raised. + + +> Some code lines in examples are commented out. +> Please never do that. Ideally find toy examples that can be regularly executed +> and checked. Lengthy examples (> 5 sec), can be wrapped in \donttest{}. +> Examples in comments in: +> friendly_class_undefined_message.Rd + +> \dontrun{} should only be used if the example really cannot be executed (e.g. +> ecause of missing additional software, missing API keys, ...) by the user. +> That's why wrapping examples in \dontrun{} adds the comment ("# Not run:") as a +> warning for the user. +> Does not seem necessary. +> Please unwrap the examples if they are executable in < 5 sec, or replace +> \dontrun{} with \donttest{}. + +Those functions are from quanteda and had to be included as duplicates here dating back to the partition of the main functions in quanteda.textmodels from quanteda. We've now simply redefined those as functions from quanteda, and removed the code from quanteda.textmodels. + + +> Please always make sure to reset to user's options(), working directory or par() after you changed it in examples and vignettes and demos. > -> man/data_corpus_dailnoconf1991.Rd +> e.g.: +> oldpar <- par(mfrow = c(1,2)) +> ... +> par(oldpar) + +Fixed now. # Checks diff --git a/inst/WORDLIST b/inst/WORDLIST index fa0db3b..eba48c8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,7 +1,8 @@ ACL Akitaka -Benoit's CMD +CSparseMatrix +Codecov Crowdflower Deerwester Dumais @@ -22,7 +23,7 @@ LBG LIBLINEAR LSA Landauer -Lifecycle +LiblineaR MFN Maas Majka @@ -56,7 +57,7 @@ Wordscores Wordscoring arXiv austin -codecov +com csr dfm docfreq @@ -65,16 +66,17 @@ doi elasticnet fastNaiveBayes gfortran +github io lr lsa macM macOS +mskogholt naivebayes nb newdata's org -pearson quanteda se stat @@ -86,6 +88,8 @@ termfreq textmodel textmodels th +u +udv underdispersed underdispersion unsmoothed diff --git a/man/affinity.Rd b/man/affinity.Rd index f297d7a..9f38569 100644 --- a/man/affinity.Rd +++ b/man/affinity.Rd @@ -15,7 +15,14 @@ affinity(p, x, smooth = 0.5, verbose = FALSE) equal in length to the number of documents} } \value{ -a list with stuff +a list containing: +\itemize{ +\item \code{coefficients} point estimates of theta +\item \code{se} (likelihood) standard error of theta +\item \code{cov} covariance matrix +\item \code{smooth} values of the smoothing parameter +\item \code{support} logical indicating if the feature was included +} } \description{ Ken recommends you use \code{\link[=textmodel_affinity]{textmodel_affinity()}} instead. diff --git a/man/as.coefficients_textmodel.Rd b/man/as.coefficients_textmodel.Rd index 743b74f..3de1f3b 100644 --- a/man/as.coefficients_textmodel.Rd +++ b/man/as.coefficients_textmodel.Rd @@ -2,16 +2,17 @@ % Please edit documentation in R/textmodel-methods.R \name{as.coefficients_textmodel} \alias{as.coefficients_textmodel} -\title{Coerce various objects to coefficients_textmodel -This is a helper function used in \verb{summary.textmodel_*}.} +\title{Coerce various objects to coefficients_textmodel} \usage{ as.coefficients_textmodel(x) } \arguments{ \item{x}{an object to be coerced} } +\value{ +an object with the class tag of \code{coefficients_textmodel} +} \description{ -Coerce various objects to coefficients_textmodel -This is a helper function used in \verb{summary.textmodel_*}. +Helper functions used in \verb{summary.textmodel_*()}. } \keyword{internal} diff --git a/man/as.matrix.csr.dfm.Rd b/man/as.matrix.csr.dfm.Rd index 9e30362..17c29f9 100644 --- a/man/as.matrix.csr.dfm.Rd +++ b/man/as.matrix.csr.dfm.Rd @@ -9,6 +9,9 @@ \arguments{ \item{x}{input \link{dfm}} } +\value{ +a \pkg{SparseM} object of class \link[SparseM:SparseM.ontology]{matrix.csr} +} \description{ Utility to convert a dfm into a \link[SparseM:SparseM.ontology]{matrix.csr} from the \pkg{SparseM} package. } diff --git a/man/as.statistics_textmodel.Rd b/man/as.statistics_textmodel.Rd index 6669a24..356824a 100644 --- a/man/as.statistics_textmodel.Rd +++ b/man/as.statistics_textmodel.Rd @@ -9,6 +9,9 @@ as.statistics_textmodel(x) \arguments{ \item{x}{an object to be coerced} } +\value{ +an object of class \code{statistics_textmodel} +} \description{ This is a helper function used in \verb{summary.textmodel_*}. } diff --git a/man/as.summary.textmodel.Rd b/man/as.summary.textmodel.Rd index 71622ca..521d83f 100644 --- a/man/as.summary.textmodel.Rd +++ b/man/as.summary.textmodel.Rd @@ -9,7 +9,10 @@ as.summary.textmodel(x) \arguments{ \item{x}{a named list} } +\value{ +an object of class \code{summary.textmodel} +} \description{ -Assign the summary.textmodel class to a list +Assigns the class \code{summary.textmodel} to a list } \keyword{internal} diff --git a/man/coef.textmodel_ca.Rd b/man/coef.textmodel_ca.Rd index c34c1f6..6b645ff 100644 --- a/man/coef.textmodel_ca.Rd +++ b/man/coef.textmodel_ca.Rd @@ -17,6 +17,18 @@ extracted} \item{...}{unused} } +\value{ +a list containing numeric vectors of feature and document +coordinates. Includes \code{NA} vectors of standard errors for consistency with +other models' coefficient outputs, and for the possibility of having these +computed in the future. +\itemize{ +\item \code{coef_feature} column coordinates of the features +\item \code{coef_feature_se} feature length vector of \code{NA} values +\item \code{coef_document} row coordinates of the documents +\item \code{coef_document_se} document length vector of \code{NA} values +} +} \description{ \code{coef()} extract model coefficients from a fitted \code{textmodel_ca} object. \code{coefficients()} is an alias. diff --git a/man/data_corpus_dailnoconf1991.Rd b/man/data_corpus_dailnoconf1991.Rd index 62fd11d..8c16f03 100644 --- a/man/data_corpus_dailnoconf1991.Rd +++ b/man/data_corpus_dailnoconf1991.Rd @@ -31,15 +31,17 @@ tmod <- textmodel_affinity(data_dfm_dailnoconf1991, dat <- data.frame(party = as.character(docvars(data_corpus_dailnoconf1991, "party")), govt = coef(pred)[, "Govt"], - position = as.character(docvars(data_corpus_dailnoconf1991, "position")), - stringsAsFactors = FALSE) + position = as.character(docvars(data_corpus_dailnoconf1991, "position"))) bymedian <- with(dat, reorder(paste(party, position), govt, median)) -par(mar = c(5, 6, 4, 2)+.1) +oldpar <- par(no.readonly = TRUE) +par(mar = c(5, 6, 4, 2) + .1) boxplot(govt ~ bymedian, data = dat, horizontal = TRUE, las = 1, - xlab = "Degree of support for government") + xlab = "Degree of support for government", + ylab = "") abline(h = 7.5, col = "red", lty = "dashed") text(c(0.9, 0.9), c(8.5, 6.5), c("Goverment", "Opposition")) +par(oldpar) } } \references{ diff --git a/man/force_conformance.Rd b/man/force_conformance.Rd index bba85ed..830ec61 100644 --- a/man/force_conformance.Rd +++ b/man/force_conformance.Rd @@ -14,6 +14,11 @@ force_conformance(x, features, force = TRUE) \item{force}{logical; if \code{TRUE}, make the new dfm conform to the vector of features, otherwise return an error message} } +\value{ +a \link[quanteda:dfm]{dfm} from the \pkg{quanteda} package containing +only \code{features} as columns, in the same order as \code{features}. A warning message +is printed if some feature names from \code{features} are not matched in \code{x}. +} \description{ Takes a dfm and a set of features, and makes them match the features listed in the set. diff --git a/man/friendly_class_undefined_message.Rd b/man/friendly_class_undefined_message.Rd deleted file mode 100644 index f84e095..0000000 --- a/man/friendly_class_undefined_message.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{friendly_class_undefined_message} -\alias{friendly_class_undefined_message} -\title{Print friendly object class not defined message} -\usage{ -friendly_class_undefined_message(object_class, function_name) -} -\arguments{ -\item{object_class}{character describing the object class} - -\item{function_name}{character which is the function name} -} -\description{ -Checks valid methods and issues a friendlier error message in case the method is -undefined for the supplied object type. -} -\examples{ -# as.tokens.default <- function(x, concatenator = "", ...) { -# stop(quanteda:::friendly_class_undefined_message(class(x), "as.tokens")) -# } -} -\keyword{internal} diff --git a/man/influence.predict.textmodel_affinity.Rd b/man/influence.predict.textmodel_affinity.Rd index 9214ef6..26b70ec 100644 --- a/man/influence.predict.textmodel_affinity.Rd +++ b/man/influence.predict.textmodel_affinity.Rd @@ -7,21 +7,40 @@ \method{influence}{predict.textmodel_affinity}(model, subset = !train, ...) } \arguments{ -\item{model}{a predicted -\link[=predict.textmodel_affinity]{textmodel_affinity} object} +\item{model}{a predicted \link[=predict.textmodel_affinity]{textmodel_affinity()} +object} \item{subset}{whether to use all data or a subset (for instance, exclude the training set)} \item{...}{unused} } +\value{ +a named list classed as \link{influence.predict.textmodel_affinity} that +contains +\itemize{ +\item \code{norm} a document by feature class sparse matrix of normalised influence +measures +\item \code{count} a vector of counts of each non-zero feature in the input matrix +\item \code{rate} the normalised feature count for each non-zero feature in the input +matrix +\item \code{mode} an integer vector of 1 or 2 indicating the class which the feature +is influencing, for each non-zero feature +\item \code{levels} a character vector of the affinity class labels +\item \code{subset} a logical vector indicating whether the document was included in +the computation of influence; \code{FALSE} for documents assigned a class label +in training the model +\item \code{support} logical vector for each feature matching the same return from +\link{predict.textmodel_affinity} +} +} \description{ -Computes the influence of features on scaled \link{textmodel_affinity} +Computes the influence of features on scaled \code{\link[=textmodel_affinity]{textmodel_affinity()}} applications. } \examples{ -tmot <- textmodel_affinity(quanteda::data_dfm_lbgexample, y = c("L", NA, NA, NA, "R", NA)) -pred <- predict(tmot) +tmod <- textmodel_affinity(quanteda::data_dfm_lbgexample, y = c("L", NA, NA, NA, "R", NA)) +pred <- predict(tmod) influence(pred) } \seealso{ diff --git a/man/message_error.Rd b/man/message_error.Rd deleted file mode 100644 index f69048a..0000000 --- a/man/message_error.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{message_error} -\alias{message_error} -\title{Return an error message} -\usage{ -message_error(key = NULL) -} -\arguments{ -\item{key}{type of error message} -} -\description{ -Return an error message -} -\keyword{internal} diff --git a/man/predict.textmodel_affinity.Rd b/man/predict.textmodel_affinity.Rd index 94d819d..57e260d 100644 --- a/man/predict.textmodel_affinity.Rd +++ b/man/predict.textmodel_affinity.Rd @@ -28,7 +28,22 @@ } \value{ \code{predict()} returns a list of predicted affinity textmodel -quantities. +quantities, containing: +\itemize{ +\item \code{coefficients} a numeric matrix of affinity estimates (coefficients) for +each class (columns) for each document (rows) +\item \code{se} a numeric matrix of likelihood standard errors for affinity coefficients +each class (columns) for each document (rows) +\item \code{cov} an array of covariance matrices for each affinity class, one per document +\item \code{smooth} a numeric vector of length two for the smoothing parameters \code{smooth} +and \code{ref_smooth} from \code{\link[=textmodel_affinity]{textmodel_affinity()}} +\item \code{newdata} a \link[quanteda:dfm]{dfm} on which prediction has been made +\item \code{train} a logical vector indicating which documents were used in training the model +\item \code{level} the confidence level for computing standard errors +\item \code{p} the \code{p} return from \code{textmodel_affinity} +\item \code{support} logical vector indicating whether a feature was included in computing +class affinities +} \code{coef()} returns a document \eqn{\times} class matrix of class affinities for each document. @@ -36,7 +51,7 @@ affinities for each document. \code{residuals()} returns a document-by-feature matrix of residuals. \code{resid()} is an alias. -\code{rstandard()} is a shortcut to return the pearson residuals. +\code{rstandard()} is a shortcut to return the Pearson residuals. } \description{ Estimate \eqn{\theta_i} for each document, from a fitted diff --git a/man/print.coefficients_textmodel.Rd b/man/print.coefficients_textmodel.Rd index 83cf0fe..8f34e28 100644 --- a/man/print.coefficients_textmodel.Rd +++ b/man/print.coefficients_textmodel.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/textmodel-methods.R \name{print.coefficients_textmodel} \alias{print.coefficients_textmodel} -\title{Print methods for textmodel features estimates -This is a helper function used in \code{print.summary.textmodel}.} +\title{Print methods for textmodel features estimates} \usage{ \method{print}{coefficients_textmodel}(x, digits = max(3L, getOption("digits") - 3L), ...) } @@ -16,7 +15,6 @@ This is a helper function used in \code{print.summary.textmodel}.} \item{...}{additional arguments not used} } \description{ -Print methods for textmodel features estimates This is a helper function used in \code{print.summary.textmodel}. } \keyword{internal} diff --git a/man/summary.textmodel_lr.Rd b/man/summary.textmodel_lr.Rd index adadfb6..10cc3f1 100644 --- a/man/summary.textmodel_lr.Rd +++ b/man/summary.textmodel_lr.Rd @@ -13,6 +13,11 @@ \item{...}{additional arguments not used} } +\value{ +a \code{summary.textmodel} classed list containing elements from the +call to \code{textmodel_lr()}, including the call, statistics for lambda, and +the estimated feature scores +} \description{ summary method for textmodel_lr objects } diff --git a/man/summary.textmodel_nb.Rd b/man/summary.textmodel_nb.Rd index 73f8245..55bd680 100644 --- a/man/summary.textmodel_nb.Rd +++ b/man/summary.textmodel_nb.Rd @@ -13,6 +13,10 @@ \item{...}{additional arguments not used} } +\value{ +a \code{summary.textmodel} classed list containing the call, the class +priors, and the estimated feature scores +} \description{ summary method for textmodel_nb objects } diff --git a/man/summary.textmodel_svm.Rd b/man/summary.textmodel_svm.Rd index e70dd3b..8edcbe1 100644 --- a/man/summary.textmodel_svm.Rd +++ b/man/summary.textmodel_svm.Rd @@ -13,6 +13,10 @@ \item{...}{additional arguments not used} } +\value{ +a \code{summary.textmodel} classed list containing the call and the +estimated feature scores +} \description{ summary method for textmodel_svm objects } diff --git a/man/summary.textmodel_svmlin.Rd b/man/summary.textmodel_svmlin.Rd index 21d747b..27b38fe 100644 --- a/man/summary.textmodel_svmlin.Rd +++ b/man/summary.textmodel_svmlin.Rd @@ -13,6 +13,10 @@ \item{...}{additional arguments not used} } +\value{ +a \code{summary.textmodel} classed list containing the call and the +estimated feature scores +} \description{ summary method for textmodel_svmlin objects } diff --git a/man/summary.textmodel_wordfish.Rd b/man/summary.textmodel_wordfish.Rd index fd9bf3d..89cd930 100644 --- a/man/summary.textmodel_wordfish.Rd +++ b/man/summary.textmodel_wordfish.Rd @@ -13,6 +13,10 @@ \item{...}{unused} } +\value{ +a \code{summary.textmodel} classed list containing the call, the +estimated document positions, and the estimated feature scores +} \description{ summary method for textmodel_wordfish } diff --git a/man/textmodel_affinity-internal.Rd b/man/textmodel_affinity-internal.Rd index 752af3a..6a6ece4 100644 --- a/man/textmodel_affinity-internal.Rd +++ b/man/textmodel_affinity-internal.Rd @@ -16,6 +16,27 @@ \arguments{ \item{n}{how many coefficients to print before truncating} } +\value{ +\code{summary.influence.predict.textmodel_affinity()} returns a list +classes as \code{summary.influence.predict.textmodel_affinity} that includes: +\itemize{ +\item \code{word} the feature name +\item \code{count} the total counts of each feature for which influence was computed +\item \code{mean}, \code{median}, \code{sd}, \code{max} mean, median, standard deviation, and maximum +values of influence for each feature, computed across classes +\item \code{direction} an integer vector of 1 or 2 indicating the class which the feature +is influencing +\item \code{rate} a document by feature class sparse matrix of normalised influence +measures +\item \code{count} a vector of counts of each non-zero feature in the input matrix +\item \code{rate} the median of \code{rate} from \code{\link[=influence.predict.textmodel_affinity]{influence.predict.textmodel_affinity()}} +\item \code{support} logical vector for each feature matching the same return from +\code{\link[=predict.textmodel_affinity]{predict.textmodel_affinity()}} +} + +the mean, the standard deviation, the direction of the influence, the rate, +and the support +} \description{ Internal print and summary methods for derivative \link{textmodel_affinity} objects. diff --git a/man/textmodel_affinity.Rd b/man/textmodel_affinity.Rd index 6069869..42583a1 100644 --- a/man/textmodel_affinity.Rd +++ b/man/textmodel_affinity.Rd @@ -32,8 +32,21 @@ defaults to 0.5} \item{verbose}{logical; if \code{TRUE} print diagnostic information during fitting.} } +\value{ +A \code{textmodel_affinity} class list object, with elements: +\itemize{ +\item \code{smooth} a numeric vector of length two for the smoothing parameters \code{smooth} +and \code{ref_smooth} +\code{x} the input model matrix \code{x} +\code{y} the vector of class training labels \code{y} +\code{p} a feature \eqn{\times} class sparse matrix of estimated class affinities +\item \code{support} logical vector indicating whether a feature was included in computing +class affinities +\item \code{call} the model call +} +} \description{ -\code{textmodel_affinity} implements the maximum likelihood supervised text +\code{textmodel_affinity()} implements the maximum likelihood supervised text scaling method described in Perry and Benoit (2017). } \examples{ @@ -54,7 +67,7 @@ the Class Affinity Model. } \seealso{ \code{\link[=predict.textmodel_affinity]{predict.textmodel_affinity()}} for methods of applying a -fitted \link{textmodel_affinity} model object to predict quantities from +fitted \code{\link[=textmodel_affinity]{textmodel_affinity()}} model object to predict quantities from (other) documents. } \author{ diff --git a/man/textmodel_lr.Rd b/man/textmodel_lr.Rd index dc9a793..49b980b 100644 --- a/man/textmodel_lr.Rd +++ b/man/textmodel_lr.Rd @@ -16,6 +16,18 @@ factors.)} \item{...}{additional arguments passed to \code{\link[glmnet:cv.glmnet]{cv.glmnet()}}} } +\value{ +an object of class \code{textmodel_lr}, a list containing: +\itemize{ +\item \code{x}, \code{y} the input model matrix and input training class labels +\item \code{algorithm} character; the type and family of logistic regression model used in calling +\code{\link[glmnet:cv.glmnet]{cv.glmnet()}} +\item \code{type} the type of associated with \code{algorithm} +\item \code{classnames} the levels of training classes in \code{y} +\item \code{lrfitted} the fitted model object from \code{\link[glmnet:cv.glmnet]{cv.glmnet()}} +\item \code{call} the model call +} +} \description{ Fits a fast penalized maximum likelihood estimator to predict discrete categories from sparse \link[quanteda:dfm]{dfm} objects. Using the \pkg{glmnet} diff --git a/man/textmodel_lsa.Rd b/man/textmodel_lsa.Rd index 1e0a53e..14181c8 100644 --- a/man/textmodel_lsa.Rd +++ b/man/textmodel_lsa.Rd @@ -13,6 +13,16 @@ textmodel_lsa(x, nd = 10, margin = c("both", "documents", "features")) \item{margin}{margin to be smoothed by the SVD} } +\value{ +a \code{textmodel_lsa} class object, a list containing: +\itemize{ +\item \code{sk} a numeric vector containing the d values from the SVD +\item \code{docs} document coordinates from the SVD (u) +\item \code{features} feature coordinates from the SVD (v) +\item \code{matrix_low_rank} the multiplication of udv' +\item \code{data} the input data as a CSparseMatrix from the \pkg{Matrix} package +} +} \description{ Fit the Latent Semantic Analysis scaling model to a \link{dfm}, which may be weighted (for instance using \code{\link[quanteda:dfm_tfidf]{quanteda::dfm_tfidf()}}). diff --git a/man/textmodel_svm.Rd b/man/textmodel_svm.Rd index a9075cb..e8523d0 100644 --- a/man/textmodel_svm.Rd +++ b/man/textmodel_svm.Rd @@ -32,6 +32,19 @@ vector classification (dual)} \item{...}{additional arguments passed to \code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}}} } +\value{ +an object of class \code{textmodel_svm}, a list containing: +\itemize{ +\item \code{x}, \code{y}, \code{weights}, \code{type}: argument values from the call parameters +\item \code{algorithm} character label of the algorithm used in the call to +\code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} +\item \code{classnames} levels of \code{y} +\item \code{bias} the value of \code{Bias} returned from \code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} +\item \code{svmlinfitted} the fitted model object passed from the call to +LiblineaR::LiblineaR()] +\item \code{call} the model call +} +} \description{ Fit a fast linear SVM classifier for texts, using the \pkg{LiblineaR} package. diff --git a/man/textplot_influence.Rd b/man/textplot_influence.Rd index bb8f2f3..0795e61 100644 --- a/man/textplot_influence.Rd +++ b/man/textplot_influence.Rd @@ -14,6 +14,11 @@ fitted or predicted scaling model object to be plotted} \item{...}{additional arguments passed to \code{\link[=plot]{plot()}}} } +\value{ +Creates a base R plot of feature influences of the median influence +by the log10 median rate of the feature, and invisibly returns the elements +from the call to \code{\link[=plot]{plot()}}. +} \description{ Plot the results of a fitted scaling model, from (e.g.) a predicted \link{textmodel_affinity} model. diff --git a/man/unused_dots.Rd b/man/unused_dots.Rd deleted file mode 100644 index 858f25b..0000000 --- a/man/unused_dots.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{unused_dots} -\alias{unused_dots} -\title{Raise warning of unused dots} -\usage{ -unused_dots(...) -} -\arguments{ -\item{...}{dots to check} -} -\description{ -Raise warning of unused dots -} -\keyword{internal} diff --git a/vignettes/textmodel_performance.Rmd b/vignettes/textmodel_performance.Rmd index 57c94b0..60b67d2 100644 --- a/vignettes/textmodel_performance.Rmd +++ b/vignettes/textmodel_performance.Rmd @@ -16,8 +16,8 @@ knitr::opts_chunk$set( ``` ```{r setup} -library("quanteda.textmodels") library("quanteda") +library("quanteda.textmodels") ``` ## Naive Bayes @@ -81,7 +81,7 @@ microbenchmark( ) ``` -And Bernoulli. Note here that while we are supplying the boolean matrix to `textmodel_nb()`, this re-weighting from the count matrix would have been performed automatically within the function had we not done so in advance - it's done here just for comparison. +And Bernoulli. Note here that while we are supplying the Boolean matrix to `textmodel_nb()`, this re-weighting from the count matrix would have been performed automatically within the function had we not done so in advance - it's done here just for comparison. ```{r} dfmat_train_bern <- dfm_weight(dfmat_train, scheme = "boolean") dfmat_test_bern <- dfm_weight(dfmat_test, scheme = "boolean") @@ -113,5 +113,5 @@ https://CRAN.R-project.org/package=naivebayes>. Date: 2020-03-08. Manning, Christopher D., Prabhakar Raghavan, and Hinrich Schütze (2008). _Introduction to Information Retrieval_. Cambridge University Press. -Skogholt, Martin (2020). _fastNaiveBayes: Extremely Fast Implementation of a Naive Bayes Classifier_. R package version 2.2.0. - https://github.com/mskogholt/fastNaiveBayes. Date: 2020-02-23. +Skogholt, Martin (2020). _fastNaiveBayes: Extremely Fast Implementation of a Naive Bayes Classifier_. R package version 2.2.1. + https://github.com/mskogholt/fastNaiveBayes. Date: 2020-05-04.