Remove custom subset (#83)

* Added package anchors in man pages for ngCMatrix-class. * Removed internal ngCMatrix subsetting code which has issues with R-devel. We use now subsetting provided by package Matrix which is almost as fast. * Removed internal code for rowSums and colSums for ngCMatrix. * Internal code for t for ngCMatrix is now only used internally. * Still export deprecated functions used by arulesSequences.
mhahsler · Aug 20, 2024 · 9ed870e · 9ed870e
1 parent 1ff499a
commit 9ed870e
Show file tree

Hide file tree

Showing 20 changed files with 114 additions and 97 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -44,7 +44,7 @@ Copyright: The source code for Apriori and Eclat was obtained from
     Buchta, Bettina Gruen and Kurt Hornik.
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Collate: 
     'AAADefs.R'
     'AAA_arules-package.R'

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,22 @@
 # arules 1.7-7-1 (xx/xx/2024)
 
-## Bugfixes
-* Fixed 'Error in .basicRuleMeasure(..) unused argument 
-  (complement = TRUE)' reported by bachnguyen-tomo.
 
 ## Changes
 * Updated tests for testthat edition 3.
 
+## Internal Changes
+* Removed internal ngCMatrix subsetting code which has issues with R-devel.
+  We use now subsetting provided by package Matrix which is almost as fast.
+* Removed internal code for rowSums and colSums for ngCMatrix. 
+* Internal code for t for ngCMatrix is now only used internally. 
+
+
+
+## Bugfixes
+* Fixed 'Error in .basicRuleMeasure(..) unused argument 
+  (complement = TRUE)' reported by bachnguyen-tomo.
+* Added package anchors in man pages for ngCMatrix-class.
+
 # arules 1.7-7 (11/28/2023)
 
 ## Changes

diff --git a/R/Matrix.R b/R/Matrix.R
@@ -17,18 +17,8 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
-
-
 ## provide some interfaces to our C implementations
 
-## t in Matrix is now faster
-##setMethod("t", signature(x = "ngCMatrix"),
-##    function(x) .Call(R_transpose_ngCMatrix, x))
-
-## overloading of [ for ngCMatrix cannot be accomplished
-## easily as there are too many signatures to overload.
-
-
 ## density for ngC/dgCMatrix
 .density_Matrix <- function(x)
   length(x@i) / prod(dim(x))

diff --git a/R/extract.R b/R/extract.R
@@ -76,8 +76,7 @@ setMethod("[", signature(
       }
 
       i <- .translate_index(i, rownames(x), nrow(x))
-      ## faster than: x@data <- x@data[,i, drop=FALSE]
-      x@data <- .Call(R_colSubset_ngCMatrix, x@data, i)
+      x@data <- x@data[,i, drop=FALSE]
 
       ### only subset if we have rows
       if (nrow(x@itemsetInfo))
@@ -100,9 +99,7 @@ setMethod("[", signature(
       }
 
       j <- .translate_index(j, colnames(x), ncol(x))
-      ## faster than: x@data <- x@data[j,, drop=FALSE]
-      x@data <- .Call(R_rowSubset_ngCMatrix, x@data, j)
-
+      x@data <- x@data[j,, drop=FALSE]
       x@itemInfo <- x@itemInfo[j, , drop = FALSE]
     }
 
@@ -159,8 +156,7 @@ setMethod("[", signature(
       }
 
       i <- .translate_index(i, rownames(x), nrow(x))
-      x@data <- .Call(R_colSubset_ngCMatrix, x@data, i)
-
+      x@data <- x@data[,i , drop=FALSE]
       x@itemInfo <- x@itemInfo[i, , drop = FALSE]
     }
 
@@ -174,8 +170,7 @@ setMethod("[", signature(
       }
 
       j <- .translate_index(j, colnames(x), ncol(x))
-      x@data <- .Call(R_rowSubset_ngCMatrix, x@data, j)
-
+      x@data <- x@data[j ,, drop=FALSE]
       x@transactionInfo <- x@transactionInfo[j, , drop = FALSE]
     }
 

diff --git a/R/is.generator.R b/R/is.generator.R
@@ -39,9 +39,6 @@
 ## support(l') >= support(l) so min(support(l' in all subsets of l)) != support(l)
 
 
-
-
-
 #' Find Generator Itemsets
 #'
 #' Provides the generic function and the method `is.generator() for

diff --git a/R/is.superset.R b/R/is.superset.R
@@ -37,12 +37,12 @@
 #' @param x,y associations or itemMatrix objects. If `y = NULL`, the super
 #' or subset structure within set `x` is calculated.
 #' @param proper a logical indicating if all or just proper super or subsets.
-#' @param sparse a logical indicating if a sparse [`ngCMatrix-class`] 
+#' @param sparse a logical indicating if a sparse [Matrix::ngCMatrix-class] 
 #' rather than a dense logical matrix should be returned. 
 #' Sparse computation requires a
 #' significantly smaller amount of memory and is much faster for large sets.
 #' @param ... currently unused.
-#' @return returns a logical matrix or a sparse [`ngCMatrix-class`] 
+#' @return returns a logical matrix or a sparse [Matrix::ngCMatrix-class] 
 #' with `length(x)` rows and `length(y)` columns.
 #' Each logical row vector represents which elements in `y` are supersets
 #' (subsets) of the corresponding element in `x`.  If either `x` or

diff --git a/R/itemFrequency.R b/R/itemFrequency.R
@@ -68,7 +68,7 @@ setMethod("itemFrequency", signature(x = "itemMatrix"),
       ## we could also use rowSums
       ##support <- tabulate(x@data@i + 1L, nbins = x@data@Dim[1])
 
-      support <- .Call(R_rowSums_ngCMatrix, x@data)
+      support <- rowSums(x@data)
       total <- length(x)
     }
 

diff --git a/R/itemMatrix.R b/R/itemMatrix.R
@@ -34,7 +34,7 @@
 #' 
 #' **Warning:** Ideally, we would store the matrix as a row-oriented sparse 
 #'   matrix (`ngRMatrix`), but the \pkg{Matrix} package provides better support for
-#'   column-oriented sparse classes (`ngCMatrix`). The matrix is therefore internally stored
+#'   column-oriented sparse classes ([Matrix::ngCMatrix-class]). The matrix is therefore internally stored
 #'   in transposed form.
 #' 
 #' **Working with several `itemMatrix` objects**
@@ -48,7 +48,7 @@
 #' @aliases itemMatrix
 #' @family itemMatrix and transactions functions
 #' 
-#' @slot data a sparse matrix of class [ngCMatrix-class] representing the itemsets. 
+#' @slot data a sparse matrix of class [Matrix::ngCMatrix-class] representing the itemsets. 
 #'       **Warning:** the matrix is stored in transposed form for efficiency reasons!.
 #' @slot itemInfo a data.frame
 #' @slot itemsetInfo a data.frame

diff --git a/R/setsItemwise.R b/R/setsItemwise.R
@@ -66,6 +66,7 @@ setMethod("itemUnion", signature(x = "itemMatrix", y = "itemMatrix"),
       stop("Length mismatch between x and y!")
 
     ### the C code does not deal well with a large number of dense rules.
+    ## FIXME: remove from src
     #x@data <- .Call(R_or_ngCMatrix", x@data, y@data)
 
     x@data <- as(x@data | y@data, "nsparseMatrix")

diff --git a/R/size.R b/R/size.R
@@ -44,27 +44,22 @@
 setGeneric("size",
   function(x, ...) standardGeneric("size"))
 
+## FIXME: Add transactionID or itemsetID as names?
+
 #' @rdname size
 setMethod("size", signature(x = "itemMatrix"),
-  function(x) {
-    ## if Matrix had colSums implemented efficiently,
-    ## we could use colSums(x@data). we use our own C code.
-    ## diff(x@data@p) is nearly as fast as colSums(x@data).
-
-    ## FIXME: Add transactionID or itemsetID as names
-    cnt <- .Call(R_colSums_ngCMatrix, x@data)
-    cnt
-  })
+  function(x) colSums(x@data)
+)
 
 #' @rdname size
 setMethod("size", signature(x = "tidLists"),
-  function(x)
-    .Call(R_colSums_ngCMatrix, x@data))
+  function(x) colSums(x@data)
+)
 
 #' @rdname size
 setMethod("size", signature(x = "itemsets"),
-  function(x)
-    size(x@items))
+  function(x) colSums(x@items@data)
+)
 
 #' @rdname size
 setMethod("size", signature(x = "rules"),

diff --git a/R/tidLists.R b/R/tidLists.R
@@ -27,7 +27,7 @@
 #' item/itemset. 
 #' 
 #' `tidLists` uses the class
-#' [ngCMatrix-class] to efficiently store the
+#' [Matrix::ngCMatrix-class] to efficiently store the
 #' transaction ID lists as a sparse matrix.  Each column in the matrix
 #' represents one transaction ID list.
 #'
@@ -63,7 +63,7 @@
 #' an object of class [transactions]. 
 #' * by calls of the form  `new("tidLists", ...)`.
 #' 
-#' @slot data an object of class [ngCMatrix-class] from package \pkg{Matrix}.
+#' @slot data an object of class [Matrix::ngCMatrix-class].
 #' @slot itemInfo a data.frame
 #' @slot transactionInfo a data.frame
 #' 

diff --git a/R/warm.R b/R/warm.R
@@ -198,7 +198,9 @@ weclat <- function(data,
     cat("\n")
   }
   ## r <- .Call(R_transpose_ngCMatrix, data@data)
-  r <- selectMethod("t", class(data@data))(data@data)
+  ##r <- selectMethod("t", class(data@data))(data@data)
+  r <- t(data@data)
+
   r <- .Call(
     R_weclat_ngCMatrix,
     r,

diff --git a/man/is.superset.Rd b/man/is.superset.Rd
diff --git a/man/itemMatrix-class.Rd b/man/itemMatrix-class.Rd
diff --git a/man/tidLists-class.Rd b/man/tidLists-class.Rd
diff --git a/src/arch64.h b/src/arch64.h
@@ -1,5 +1,5 @@
 /*----------------------------------------------------------------------
-  Detect 64 bit architecure
+  Detect 64 bit architecture
   ----------------------------------------------------------------------*/
 
 /*

diff --git a/src/arrayIndex.c b/src/arrayIndex.c
@@ -2,15 +2,49 @@
 #include <R.h>
 #include <Rinternals.h>
 
-/* workaround i18n */
-#define _(x) (x)
+/* DEPRECATED (8/20/24)
+ * This file is deprecated and will be 
+ * removed. Currently used to export functions used by arulesSequences
+ * / 
 
-/* 
- * copied from 2.14-2 src/main/subsript.c
- *
- * ceeboo 2012/11
- */
+// adapted from 4.4.1 src/main/match.c
+//
+// ceeboo 2024/8
+//
+static Rboolean nonNullStringMatch(SEXP s, SEXP t)
+{
+    /* "" or NA string matches nothing */
+    if (s == NA_STRING || t == NA_STRING) return FALSE;
+    if (CHAR(s)[0] && CHAR(t)[0]) {
+	if (s == t)
+	    return TRUE;
+    /* The only case where pointer comparisons do not suffice is where
+      we have two strings in different encodings (which must be
+      non-ASCII strings). Note that one of the strings could be marked
+      as unknown. */
+	if (getCharCE(s) == CE_BYTES && getCharCE(t) == CE_BYTES) 
+	    return strcmp(CHAR(s), CHAR(t)) ? FALSE : TRUE;
+	if (getCharCE(s) == CE_BYTES || getCharCE(t) == CE_BYTES)
+	    return FALSE;
+	if (getCharCE(s) == getCharCE(t))
+	    return FALSE;
+	if (getCharCE(s) == CE_NATIVE || getCharCE(t) == CE_NATIVE)
+	    return FALSE;
+        void *vmax = vmaxget();
+        int result = strcmp(translateCharUTF8(s), translateCharUTF8(t));
+        vmaxset(vmax); /* discard any memory used by translateCharUTF8 */
+        return result ? FALSE : TRUE;
+    } else
+	return FALSE;
+}
 
+// workaround i18n
+#define _(x) (x)
+
+// copied from 2.14-2 src/main/subscript.c
+//
+// ceeboo 2011/11 2014/1
+//
 #define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy);
 
 static SEXP nullSubscript(int n)
@@ -179,7 +213,7 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
 		    if (!in && TYPEOF(names_j) != CHARSXP) {
 			ECALL(call, _("character vector element does not have type CHARSXP"));
 		    }
-		    if (NonNullStringMatch(STRING_ELT(s, i), names_j)) {
+		    if (nonNullStringMatch(STRING_ELT(s, i), names_j)) {
 			sub = j + 1;
 			SET_VECTOR_ELT(indexnames, i, R_NilValue);
 			break;
@@ -195,7 +229,7 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
 	sub = INTEGER(indx)[i];
 	if (sub == 0) {
 	    for (j = 0 ; j < i ; j++)
-		if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) {
+		if (nonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) {
 		    sub = INTEGER(indx)[j];
 		    SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j));
 		    break;
@@ -224,14 +258,14 @@ stringSubscript(SEXP s, int ns, int nx, SEXP names,
 /* Array Subscripts.
     dim is the dimension (0 to k-1)
     s is the subscript list,
-    dg is the attribute name of dim
-    dng is the attribute name of dimnames
+    dn is the attribute name of dim
+    dnn is the attribute name of dimnames
     x is the array to be subscripted.
 */
 
 SEXP
-_int_arraySubscript(int dim, SEXP s, const char *dn, const char *dnn,
-		   SEXP x, Rboolean in, SEXP call)
+_int_array_subscript(int dim, SEXP s, const char *dn, const char *dnn,
+		     SEXP x, Rboolean in, SEXP call)
 {
     int nd, ns, stretch = 0;
     SEXP dnames, tmp;
@@ -270,14 +304,14 @@ _int_arraySubscript(int dim, SEXP s, const char *dn, const char *dnn,
     return R_NilValue;
 }
 
-/* R interface */
+// R interface
 SEXP
 R_arraySubscript(SEXP x, SEXP dim, SEXP s, SEXP dn, SEXP dnn) {
-    /* FIXME */
-    return _int_arraySubscript(INTEGER(dim)[0], s, 
+    // FIXME
+    return _int_array_subscript(INTEGER(dim)[0], s, 
 			      (const char *) CHAR(STRING_ELT(dn, 0)), 
 			      (const char *) CHAR(STRING_ELT(dnn, 0)), 
 			      x, TRUE, R_NilValue);
 }
 
-
+//
-Original file line number
+Diff line change
@@ Expand Up / @@ -39,9 +39,6 @@ @@
     ## support(l') >= support(l) so min(support(l' in all subsets of l)) != support(l)
     #' Find Generator Itemsets
     #'
     #' Provides the generic function and the method `is.generator() for
@@ Expand Down @@