Skip to content

Commit

Permalink
Merge pull request #67 from quanteda/issue-66
Browse files Browse the repository at this point in the history
Issue 66
  • Loading branch information
kbenoit authored Nov 7, 2023
2 parents 58a1f03 + 5d91d30 commit 68a8489
Show file tree
Hide file tree
Showing 13 changed files with 100 additions and 172 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
covr::codecov(
quiet = FALSE,
clean = FALSE,
install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
)
shell: Rscript {0}

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: quanteda.textstats
Version: 0.96.4
Version: 0.96.5
Title: Textual Statistics for the Quantitative Analysis of Textual Data
Description: Textual statistics functions formerly in the 'quanteda' package.
Textual statistics for characterizing and comparing textual data. Includes
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Fixes for C++ header compatibility for existing **quanteda** 3.x and the forthcoming 4.0 version.
* Fixes for compatibility with Matrix >= 1.5. (#54)
* Fixed how subsetting (`[`) works for textstat outputs, to fix #50.
* Updated the C++ code generally and for better calling the tbb library for multi-threading.

# quanteda.textstats 0.95

Expand Down
8 changes: 4 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

qatd_cpp_collocations <- function(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing) {
.Call(`_quanteda_textstats_qatd_cpp_collocations`, texts_, types_, words_ignore_, count_min, sizes_, method, smoothing)
cpp_collocations <- function(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing, thread = 1L) {
.Call(`_quanteda_textstats_cpp_collocations`, texts_, types_, words_ignore_, count_min, sizes_, method, smoothing, thread)
}

qatd_cpp_keyness <- function(mt, measure, correct) {
.Call(`_quanteda_textstats_qatd_cpp_keyness`, mt, measure, correct)
cpp_keyness <- function(mt, measure, correct, thread = 1L) {
.Call(`_quanteda_textstats_cpp_keyness`, mt, measure, correct, thread)
}

7 changes: 3 additions & 4 deletions R/textstat_collocations.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ textstat_collocations.tokens <- function(x, method = "lambda",
x <- as.tokens(x)
method <- match.arg(method, c("lambda"))


if (any(size == 1))
stop("Collocation sizes must be larger than 1")
if (any(size > 5))
Expand All @@ -159,9 +158,9 @@ textstat_collocations.tokens <- function(x, method = "lambda",
types <- types(x)
id_ignore <- unlist(pattern2id("^\\p{P}+$", types, "regex", FALSE), use.names = FALSE)
if (is.null(id_ignore)) id_ignore <- integer()
result <- qatd_cpp_collocations(x, types, id_ignore, min_count, size,
if (method == "lambda1") "lambda1" else "lambda",
smoothing)
result <- cpp_collocations(x, types, id_ignore, min_count, size,
if (method == "lambda1") "lambda1" else "lambda",
smoothing, get_threads())

# compute z for lambda methods
result$z <- result$lambda / result$sigma
Expand Down
6 changes: 3 additions & 3 deletions R/textstat_keyness.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ textstat_keyness.dfm <- function(x, target = 1L, measure = c("chi2", "exact", "l
label <- c("target", "reference")
}
}
grouping <- factor(target, levels = c(TRUE, FALSE), labels = label)
temp <- dfm_group(x, groups = grouping)
g <- factor(target, levels = c(TRUE, FALSE), labels = label)
temp <- dfm_group(x, groups = g)

if (measure == "exact") {
if (measure == "exact" && !correction %in% c("default", "none"))
Expand All @@ -148,7 +148,7 @@ textstat_keyness.dfm <- function(x, target = 1L, measure = c("chi2", "exact", "l
warning("correction is always none for pmi")
result <- data.frame(
feature = featnames(temp),
stat = qatd_cpp_keyness(temp, measure, correction),
stat = cpp_keyness(temp, measure, correction, get_threads()),
p = NA,
n_target = as.vector(temp[1, ]),
n_reference = as.vector(temp[2, ]),
Expand Down
8 changes: 8 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ check_dots <- function(..., method = NULL) {
warning(arg, " argument is not used.", call. = FALSE)
}
}

get_threads <- function() {
value <- getOption("quanteda_threads", -1L)
if (!is.integer(value) || length(value) != 1L)
stop("Invalid value of threads in quanteda options")
return(value)
}

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

[![CRAN
Version](https://www.r-pkg.org/badges/version/quanteda.textstats)](https://CRAN.R-project.org/package=quanteda.textstats)
[![](https://img.shields.io/badge/devel%20version-0.96.4-royalblue.svg)](https://github.com/quanteda/quanteda.textstats)
[![](https://img.shields.io/badge/devel%20version-0.96.5-royalblue.svg)](https://github.com/quanteda/quanteda.textstats)
[![Downloads](https://cranlogs.r-pkg.org/badges/quanteda.textstats)](https://CRAN.R-project.org/package=quanteda.textstats)
[![Total
Downloads](https://cranlogs.r-pkg.org/badges/grand-total/quanteda.textstats?color=orange)](https://CRAN.R-project.org/package=quanteda.textstats)
Expand Down
5 changes: 1 addition & 4 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

Purpose:

* To fix a C++ issue with TBB that was causing an installation failure on
downstream packages on ubuntu-devel environments.
* To remove some C++ code for similarity computations that is not needed
because it is contained in the **proxy** library.
* To update the C++ code to better call the tbb library for parallel computing.

## Test environments

Expand Down
5 changes: 1 addition & 4 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ Zeitschrift
Zusammenhang
anderen
angewandte
codecov
cruft
des
deutsche
Expand All @@ -134,18 +133,15 @@ dfm's
dfms
dist
docvars
doi
eines
für
gfortran
hamann
hamman
io
keyness
koRpus
macOS
nsyllable
org
proxyC
puncts
quanteda
Expand All @@ -154,6 +150,7 @@ rix
sents
simil
sy
tbb
textstat
textstats
th
Expand Down
22 changes: 12 additions & 10 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// qatd_cpp_collocations
DataFrame qatd_cpp_collocations(const List& texts_, const CharacterVector& types_, const IntegerVector& words_ignore_, const unsigned int count_min, const IntegerVector sizes_, const String& method, const double smoothing);
RcppExport SEXP _quanteda_textstats_qatd_cpp_collocations(SEXP texts_SEXP, SEXP types_SEXP, SEXP words_ignore_SEXP, SEXP count_minSEXP, SEXP sizes_SEXP, SEXP methodSEXP, SEXP smoothingSEXP) {
// cpp_collocations
DataFrame cpp_collocations(const List& texts_, const CharacterVector& types_, const IntegerVector& words_ignore_, const unsigned int count_min, const IntegerVector sizes_, const String& method, const double smoothing, const int thread);
RcppExport SEXP _quanteda_textstats_cpp_collocations(SEXP texts_SEXP, SEXP types_SEXP, SEXP words_ignore_SEXP, SEXP count_minSEXP, SEXP sizes_SEXP, SEXP methodSEXP, SEXP smoothingSEXP, SEXP threadSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -24,27 +24,29 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< const IntegerVector >::type sizes_(sizes_SEXP);
Rcpp::traits::input_parameter< const String& >::type method(methodSEXP);
Rcpp::traits::input_parameter< const double >::type smoothing(smoothingSEXP);
rcpp_result_gen = Rcpp::wrap(qatd_cpp_collocations(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing));
Rcpp::traits::input_parameter< const int >::type thread(threadSEXP);
rcpp_result_gen = Rcpp::wrap(cpp_collocations(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing, thread));
return rcpp_result_gen;
END_RCPP
}
// qatd_cpp_keyness
Rcpp::NumericVector qatd_cpp_keyness(arma::sp_mat& mt, const std::string measure, const std::string correct);
RcppExport SEXP _quanteda_textstats_qatd_cpp_keyness(SEXP mtSEXP, SEXP measureSEXP, SEXP correctSEXP) {
// cpp_keyness
Rcpp::NumericVector cpp_keyness(arma::sp_mat& mt, const std::string measure, const std::string correct, const int thread);
RcppExport SEXP _quanteda_textstats_cpp_keyness(SEXP mtSEXP, SEXP measureSEXP, SEXP correctSEXP, SEXP threadSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< arma::sp_mat& >::type mt(mtSEXP);
Rcpp::traits::input_parameter< const std::string >::type measure(measureSEXP);
Rcpp::traits::input_parameter< const std::string >::type correct(correctSEXP);
rcpp_result_gen = Rcpp::wrap(qatd_cpp_keyness(mt, measure, correct));
Rcpp::traits::input_parameter< const int >::type thread(threadSEXP);
rcpp_result_gen = Rcpp::wrap(cpp_keyness(mt, measure, correct, thread));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_quanteda_textstats_qatd_cpp_collocations", (DL_FUNC) &_quanteda_textstats_qatd_cpp_collocations, 7},
{"_quanteda_textstats_qatd_cpp_keyness", (DL_FUNC) &_quanteda_textstats_qatd_cpp_keyness, 3},
{"_quanteda_textstats_cpp_collocations", (DL_FUNC) &_quanteda_textstats_cpp_collocations, 8},
{"_quanteda_textstats_cpp_keyness", (DL_FUNC) &_quanteda_textstats_cpp_keyness, 4},
{NULL, NULL, 0}
};

Expand Down
Loading

0 comments on commit 68a8489

Please sign in to comment.