Skip to content

Commit

Permalink
Remove custom subset (#83)
Browse files Browse the repository at this point in the history
* Added package anchors in man pages for ngCMatrix-class.

* Removed internal ngCMatrix subsetting code which has issues with R-devel. We use now subsetting provided by package Matrix which is almost as fast.

* Removed internal code for rowSums and colSums for ngCMatrix.

* Internal code for t for ngCMatrix is now only used internally.

* Still export deprecated functions used by arulesSequences.
  • Loading branch information
mhahsler authored Aug 20, 2024
1 parent 1ff499a commit 9ed870e
Show file tree
Hide file tree
Showing 20 changed files with 114 additions and 97 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Copyright: The source code for Apriori and Eclat was obtained from
Buchta, Bettina Gruen and Kurt Hornik.
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Collate:
'AAADefs.R'
'AAA_arules-package.R'
Expand Down
16 changes: 13 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
# arules 1.7-7-1 (xx/xx/2024)

## Bugfixes
* Fixed 'Error in .basicRuleMeasure(..) unused argument
(complement = TRUE)' reported by bachnguyen-tomo.

## Changes
* Updated tests for testthat edition 3.

## Internal Changes
* Removed internal ngCMatrix subsetting code which has issues with R-devel.
We use now subsetting provided by package Matrix which is almost as fast.
* Removed internal code for rowSums and colSums for ngCMatrix.
* Internal code for t for ngCMatrix is now only used internally.



## Bugfixes
* Fixed 'Error in .basicRuleMeasure(..) unused argument
(complement = TRUE)' reported by bachnguyen-tomo.
* Added package anchors in man pages for ngCMatrix-class.

# arules 1.7-7 (11/28/2023)

## Changes
Expand Down
10 changes: 0 additions & 10 deletions R/Matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,8 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.



## provide some interfaces to our C implementations

## t in Matrix is now faster
##setMethod("t", signature(x = "ngCMatrix"),
## function(x) .Call(R_transpose_ngCMatrix, x))

## overloading of [ for ngCMatrix cannot be accomplished
## easily as there are too many signatures to overload.


## density for ngC/dgCMatrix
.density_Matrix <- function(x)
length(x@i) / prod(dim(x))
Expand Down
13 changes: 4 additions & 9 deletions R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ setMethod("[", signature(
}

i <- .translate_index(i, rownames(x), nrow(x))
## faster than: x@data <- x@data[,i, drop=FALSE]
x@data <- .Call(R_colSubset_ngCMatrix, x@data, i)
x@data <- x@data[,i, drop=FALSE]

### only subset if we have rows
if (nrow(x@itemsetInfo))
Expand All @@ -100,9 +99,7 @@ setMethod("[", signature(
}

j <- .translate_index(j, colnames(x), ncol(x))
## faster than: x@data <- x@data[j,, drop=FALSE]
x@data <- .Call(R_rowSubset_ngCMatrix, x@data, j)

x@data <- x@data[j,, drop=FALSE]
x@itemInfo <- x@itemInfo[j, , drop = FALSE]
}

Expand Down Expand Up @@ -159,8 +156,7 @@ setMethod("[", signature(
}

i <- .translate_index(i, rownames(x), nrow(x))
x@data <- .Call(R_colSubset_ngCMatrix, x@data, i)

x@data <- x@data[,i , drop=FALSE]
x@itemInfo <- x@itemInfo[i, , drop = FALSE]
}

Expand All @@ -174,8 +170,7 @@ setMethod("[", signature(
}

j <- .translate_index(j, colnames(x), ncol(x))
x@data <- .Call(R_rowSubset_ngCMatrix, x@data, j)

x@data <- x@data[j ,, drop=FALSE]
x@transactionInfo <- x@transactionInfo[j, , drop = FALSE]
}

Expand Down
3 changes: 0 additions & 3 deletions R/is.generator.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@
## support(l') >= support(l) so min(support(l' in all subsets of l)) != support(l)





#' Find Generator Itemsets
#'
#' Provides the generic function and the method `is.generator() for
Expand Down
4 changes: 2 additions & 2 deletions R/is.superset.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@
#' @param x,y associations or itemMatrix objects. If `y = NULL`, the super
#' or subset structure within set `x` is calculated.
#' @param proper a logical indicating if all or just proper super or subsets.
#' @param sparse a logical indicating if a sparse [`ngCMatrix-class`]
#' @param sparse a logical indicating if a sparse [Matrix::ngCMatrix-class]
#' rather than a dense logical matrix should be returned.
#' Sparse computation requires a
#' significantly smaller amount of memory and is much faster for large sets.
#' @param ... currently unused.
#' @return returns a logical matrix or a sparse [`ngCMatrix-class`]
#' @return returns a logical matrix or a sparse [Matrix::ngCMatrix-class]
#' with `length(x)` rows and `length(y)` columns.
#' Each logical row vector represents which elements in `y` are supersets
#' (subsets) of the corresponding element in `x`. If either `x` or
Expand Down
2 changes: 1 addition & 1 deletion R/itemFrequency.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ setMethod("itemFrequency", signature(x = "itemMatrix"),
## we could also use rowSums
##support <- tabulate(x@data@i + 1L, nbins = x@data@Dim[1])

support <- .Call(R_rowSums_ngCMatrix, x@data)
support <- rowSums(x@data)
total <- length(x)
}

Expand Down
4 changes: 2 additions & 2 deletions R/itemMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#'
#' **Warning:** Ideally, we would store the matrix as a row-oriented sparse
#' matrix (`ngRMatrix`), but the \pkg{Matrix} package provides better support for
#' column-oriented sparse classes (`ngCMatrix`). The matrix is therefore internally stored
#' column-oriented sparse classes ([Matrix::ngCMatrix-class]). The matrix is therefore internally stored
#' in transposed form.
#'
#' **Working with several `itemMatrix` objects**
Expand All @@ -48,7 +48,7 @@
#' @aliases itemMatrix
#' @family itemMatrix and transactions functions
#'
#' @slot data a sparse matrix of class [ngCMatrix-class] representing the itemsets.
#' @slot data a sparse matrix of class [Matrix::ngCMatrix-class] representing the itemsets.
#' **Warning:** the matrix is stored in transposed form for efficiency reasons!.
#' @slot itemInfo a data.frame
#' @slot itemsetInfo a data.frame
Expand Down
1 change: 1 addition & 0 deletions R/setsItemwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ setMethod("itemUnion", signature(x = "itemMatrix", y = "itemMatrix"),
stop("Length mismatch between x and y!")

### the C code does not deal well with a large number of dense rules.
## FIXME: remove from src
#x@data <- .Call(R_or_ngCMatrix", x@data, y@data)

x@data <- as(x@data | y@data, "nsparseMatrix")
Expand Down
21 changes: 8 additions & 13 deletions R/size.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,22 @@
setGeneric("size",
function(x, ...) standardGeneric("size"))

## FIXME: Add transactionID or itemsetID as names?

#' @rdname size
setMethod("size", signature(x = "itemMatrix"),
function(x) {
## if Matrix had colSums implemented efficiently,
## we could use colSums(x@data). we use our own C code.
## diff(x@data@p) is nearly as fast as colSums(x@data).

## FIXME: Add transactionID or itemsetID as names
cnt <- .Call(R_colSums_ngCMatrix, x@data)
cnt
})
function(x) colSums(x@data)
)

#' @rdname size
setMethod("size", signature(x = "tidLists"),
function(x)
.Call(R_colSums_ngCMatrix, x@data))
function(x) colSums(x@data)
)

#' @rdname size
setMethod("size", signature(x = "itemsets"),
function(x)
size(x@items))
function(x) colSums(x@items@data)
)

#' @rdname size
setMethod("size", signature(x = "rules"),
Expand Down
4 changes: 2 additions & 2 deletions R/tidLists.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' item/itemset.
#'
#' `tidLists` uses the class
#' [ngCMatrix-class] to efficiently store the
#' [Matrix::ngCMatrix-class] to efficiently store the
#' transaction ID lists as a sparse matrix. Each column in the matrix
#' represents one transaction ID list.
#'
Expand Down Expand Up @@ -63,7 +63,7 @@
#' an object of class [transactions].
#' * by calls of the form `new("tidLists", ...)`.
#'
#' @slot data an object of class [ngCMatrix-class] from package \pkg{Matrix}.
#' @slot data an object of class [Matrix::ngCMatrix-class].
#' @slot itemInfo a data.frame
#' @slot transactionInfo a data.frame
#'
Expand Down
4 changes: 3 additions & 1 deletion R/warm.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ weclat <- function(data,
cat("\n")
}
## r <- .Call(R_transpose_ngCMatrix, data@data)
r <- selectMethod("t", class(data@data))(data@data)
##r <- selectMethod("t", class(data@data))(data@data)
r <- t(data@data)

r <- .Call(
R_weclat_ngCMatrix,
r,
Expand Down
4 changes: 2 additions & 2 deletions man/is.superset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/itemMatrix-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/tidLists-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/arch64.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*----------------------------------------------------------------------
Detect 64 bit architecure
Detect 64 bit architecture
----------------------------------------------------------------------*/

/*
Expand Down
68 changes: 51 additions & 17 deletions src/arrayIndex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,49 @@
#include <R.h>
#include <Rinternals.h>

/* workaround i18n */
#define _(x) (x)
/* DEPRECATED (8/20/24)
* This file is deprecated and will be
* removed. Currently used to export functions used by arulesSequences
* /
/*
* copied from 2.14-2 src/main/subsript.c
*
* ceeboo 2012/11
*/
// adapted from 4.4.1 src/main/match.c
//
// ceeboo 2024/8
//
static Rboolean nonNullStringMatch(SEXP s, SEXP t)
{
/* "" or NA string matches nothing */
if (s == NA_STRING || t == NA_STRING) return FALSE;
if (CHAR(s)[0] && CHAR(t)[0]) {
if (s == t)
return TRUE;
/* The only case where pointer comparisons do not suffice is where
we have two strings in different encodings (which must be
non-ASCII strings). Note that one of the strings could be marked
as unknown. */
if (getCharCE(s) == CE_BYTES && getCharCE(t) == CE_BYTES)
return strcmp(CHAR(s), CHAR(t)) ? FALSE : TRUE;
if (getCharCE(s) == CE_BYTES || getCharCE(t) == CE_BYTES)
return FALSE;
if (getCharCE(s) == getCharCE(t))
return FALSE;
if (getCharCE(s) == CE_NATIVE || getCharCE(t) == CE_NATIVE)
return FALSE;
void *vmax = vmaxget();
int result = strcmp(translateCharUTF8(s), translateCharUTF8(t));
vmaxset(vmax); /* discard any memory used by translateCharUTF8 */
return result ? FALSE : TRUE;
} else
return FALSE;
}

// workaround i18n
#define _(x) (x)

// copied from 2.14-2 src/main/subscript.c
//
// ceeboo 2011/11 2014/1
//
#define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy);

static SEXP nullSubscript(int n)
Expand Down Expand Up @@ -179,7 +213,7 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
if (!in && TYPEOF(names_j) != CHARSXP) {
ECALL(call, _("character vector element does not have type CHARSXP"));
}
if (NonNullStringMatch(STRING_ELT(s, i), names_j)) {
if (nonNullStringMatch(STRING_ELT(s, i), names_j)) {
sub = j + 1;
SET_VECTOR_ELT(indexnames, i, R_NilValue);
break;
Expand All @@ -195,7 +229,7 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
sub = INTEGER(indx)[i];
if (sub == 0) {
for (j = 0 ; j < i ; j++)
if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) {
if (nonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) {
sub = INTEGER(indx)[j];
SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j));
break;
Expand Down Expand Up @@ -224,14 +258,14 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
/* Array Subscripts.
dim is the dimension (0 to k-1)
s is the subscript list,
dg is the attribute name of dim
dng is the attribute name of dimnames
dn is the attribute name of dim
dnn is the attribute name of dimnames
x is the array to be subscripted.
*/

SEXP
_int_arraySubscript(int dim, SEXP s, const char *dn, const char *dnn,
SEXP x, Rboolean in, SEXP call)
_int_array_subscript(int dim, SEXP s, const char *dn, const char *dnn,
SEXP x, Rboolean in, SEXP call)
{
int nd, ns, stretch = 0;
SEXP dnames, tmp;
Expand Down Expand Up @@ -270,14 +304,14 @@ _int_arraySubscript(int dim, SEXP s, const char *dn, const char *dnn,
return R_NilValue;
}

/* R interface */
// R interface
SEXP
R_arraySubscript(SEXP x, SEXP dim, SEXP s, SEXP dn, SEXP dnn) {
/* FIXME */
return _int_arraySubscript(INTEGER(dim)[0], s,
// FIXME
return _int_array_subscript(INTEGER(dim)[0], s,
(const char *) CHAR(STRING_ELT(dn, 0)),
(const char *) CHAR(STRING_ELT(dnn, 0)),
x, TRUE, R_NilValue);
}


//
Loading

0 comments on commit 9ed870e

Please sign in to comment.