-
Notifications
You must be signed in to change notification settings - Fork 34
count
matrixStats: Benchmark report
This report benchmark the performance of count() against alternative methods.
- sum(x == value)
> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+ mode <- match.arg(mode)
+ if (mode == "logical") {
+ x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+ } else {
+ x <- runif(n, min = range[1], max = range[2])
+ }
+ storage.mode(x) <- mode
+ if (na_prob > 0)
+ x[sample(n, size = na_prob * n)] <- NA
+ x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+ set.seed(seed)
+ data <- list()
+ data[[1]] <- rvector(n = scale * 100, ...)
+ data[[2]] <- rvector(n = scale * 1000, ...)
+ data[[3]] <- rvector(n = scale * 10000, ...)
+ data[[4]] <- rvector(n = scale * 1e+05, ...)
+ data[[5]] <- rvector(n = scale * 1e+06, ...)
+ names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+ data
+ }
> data <- rvectors(mode = mode)
> x <- data[["n = 1000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3231175 172.6 5709258 305.0 5709258 305.0
Vcells 33430019 255.1 59837990 456.6 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on integer+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001668 | 0.001766 | 0.0020640 | 0.0018505 | 0.0019250 | 0.021897 |
2 | sum(x == value) | 0.002571 | 0.002667 | 0.0028124 | 0.0027235 | 0.0027855 | 0.011185 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.0000000 |
2 | sum(x == value) | 1.541367 | 1.510193 | 1.362586 | 1.471764 | 1.447013 | 0.5108006 |
Figure: Benchmarking of count() and sum(x == value)() on integer+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 10000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3228942 172.5 5709258 305.0 5709258 305.0
Vcells 11794763 90.0 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on integer+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001685 | 0.001856 | 0.0022827 | 0.0020615 | 0.0023060 | 0.020644 |
2 | sum(x == value) | 0.022238 | 0.022466 | 0.0229967 | 0.0225655 | 0.0227155 | 0.031721 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.00000 | 1.00000 | 1.00000 | 1.00000 | 1.000000 | 1.000000 |
2 | sum(x == value) | 13.19763 | 12.10453 | 10.07446 | 10.94616 | 9.850607 | 1.536572 |
Figure: Benchmarking of count() and sum(x == value)() on integer+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 100000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229005 172.5 5709258 305.0 5709258 305.0
Vcells 11794805 90.0 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on integer+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001679 | 0.0019050 | 0.0026980 | 0.002224 | 0.0028375 | 0.029385 |
2 | sum(x == value) | 0.206338 | 0.2172605 | 0.2189826 | 0.218022 | 0.2197080 | 0.244302 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.0000 | 1.0000 | 1.00000 | 1.00000 | 1.00000 | 1.000000 |
2 | sum(x == value) | 122.8934 | 114.0475 | 81.16329 | 98.03147 | 77.43013 | 8.313834 |
Figure: Benchmarking of count() and sum(x == value)() on integer+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 1000000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229068 172.5 5709258 305.0 5709258 305.0
Vcells 11795360 90.0 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on integer+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001668 | 0.002062 | 0.0092437 | 0.0088085 | 0.016791 | 0.03742 |
2 | sum(x == value) | 2.738796 | 3.213318 | 3.4531879 | 3.2412525 | 3.285109 | 15.69834 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.000 | 1.00 | 1.0000 | 1.0000 | 1.000 | 1.0000 |
2 | sum(x == value) | 1641.964 | 1558.35 | 373.5716 | 367.9687 | 195.647 | 419.5172 |
Figure: Benchmarking of count() and sum(x == value)() on integer+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 10000000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229131 172.5 5709258 305.0 5709258 305.0
Vcells 11795402 90.0 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on integer+n = 10000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001711 | 0.002137 | 0.0126709 | 0.0034835 | 0.0227455 | 0.052068 |
2 | sum(x == value) | 31.990870 | 32.284979 | 36.0097863 | 32.8202760 | 34.2459520 | 55.251011 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.00 | 1.00 | 1.000 | 1.000 | 1.000 | 1.000 |
2 | sum(x == value) | 18697.18 | 15107.62 | 2841.939 | 9421.638 | 1505.614 | 1061.132 |
Figure: Benchmarking of count() and sum(x == value)() on integer+n = 10000000 data. Outliers are displayed as crosses. Times are in milliseconds.
> rvector <- function(n, mode = c("logical", "double", "integer"), range = c(-100, +100), na_prob = 0) {
+ mode <- match.arg(mode)
+ if (mode == "logical") {
+ x <- sample(c(FALSE, TRUE), size = n, replace = TRUE)
+ } else {
+ x <- runif(n, min = range[1], max = range[2])
+ }
+ storage.mode(x) <- mode
+ if (na_prob > 0)
+ x[sample(n, size = na_prob * n)] <- NA
+ x
+ }
> rvectors <- function(scale = 10, seed = 1, ...) {
+ set.seed(seed)
+ data <- list()
+ data[[1]] <- rvector(n = scale * 100, ...)
+ data[[2]] <- rvector(n = scale * 1000, ...)
+ data[[3]] <- rvector(n = scale * 10000, ...)
+ data[[4]] <- rvector(n = scale * 1e+05, ...)
+ data[[5]] <- rvector(n = scale * 1e+06, ...)
+ names(data) <- sprintf("n = %d", sapply(data, FUN = length))
+ data
+ }
> data <- rvectors(mode = mode)
> x <- data[["n = 1000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229194 172.5 5709258 305.0 5709258 305.0
Vcells 17351273 132.4 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on double+n = 1000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001683 | 0.0017205 | 0.0023145 | 0.0018620 | 0.0020020 | 0.043659 |
2 | sum(x == value) | 0.002109 | 0.0022205 | 0.0024306 | 0.0022905 | 0.0023675 | 0.015180 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.0000000 |
2 | sum(x == value) | 1.253119 | 1.290613 | 1.050184 | 1.230129 | 1.182567 | 0.3476946 |
Figure: Benchmarking of count() and sum(x == value)() on double+n = 1000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 10000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229257 172.5 5709258 305.0 5709258 305.0
Vcells 17351314 132.4 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on double+n = 10000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001677 | 0.0019015 | 0.0025086 | 0.0021490 | 0.0023955 | 0.038174 |
2 | sum(x == value) | 0.017422 | 0.0176605 | 0.0182435 | 0.0177825 | 0.0179710 | 0.028087 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.0000000 |
2 | sum(x == value) | 10.38879 | 9.287668 | 7.272254 | 8.274779 | 7.501983 | 0.7357626 |
Figure: Benchmarking of count() and sum(x == value)() on double+n = 10000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 100000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229320 172.5 5709258 305.0 5709258 305.0
Vcells 17351639 132.4 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on double+n = 100000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001681 | 0.0018845 | 0.0026881 | 0.0022020 | 0.0028000 | 0.028854 |
2 | sum(x == value) | 0.167430 | 0.1688190 | 0.1710085 | 0.1694525 | 0.1707535 | 0.205628 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.00000 | 1.00000 | 1.0000 | 1.00000 | 1.00000 | 1.000000 |
2 | sum(x == value) | 99.60143 | 89.58291 | 63.6164 | 76.95391 | 60.98339 | 7.126499 |
Figure: Benchmarking of count() and sum(x == value)() on double+n = 100000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 1000000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229383 172.5 5709258 305.0 5709258 305.0
Vcells 17352014 132.4 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on double+n = 1000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.001665 | 0.0020285 | 0.0078366 | 0.007264 | 0.012295 | 0.025685 |
2 | sum(x == value) | 1.730855 | 2.7169495 | 3.0016203 | 2.744386 | 2.806246 | 14.117200 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.000 | 1.000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
2 | sum(x == value) | 1039.553 | 1339.388 | 383.0278 | 377.8064 | 228.2429 | 549.6282 |
Figure: Benchmarking of count() and sum(x == value)() on double+n = 1000000 data. Outliers are displayed as crosses. Times are in milliseconds.
> x <- data[["n = 10000000"]]
> gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3229446 172.5 5709258 305.0 5709258 305.0
Vcells 17352056 132.4 38296314 292.2 57084605 435.6
> stats <- microbenchmark(count = count(x, value), `sum(x == value)` = sum(x == value), unit = "ms")
Table: Benchmarking of count() and sum(x == value)() on double+n = 10000000 data. The top panel shows times in milliseconds and the bottom panel shows relative times.
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 0.00170 | 0.0021225 | 0.0117215 | 0.0030785 | 0.0215545 | 0.036833 |
2 | sum(x == value) | 27.04751 | 27.3330855 | 30.7959116 | 27.8625050 | 28.7366065 | 44.825225 |
expr | min | lq | mean | median | uq | max | |
---|---|---|---|---|---|---|---|
1 | count | 1.0 | 1.00 | 1.000 | 1.000 | 1.000 | 1.000 |
2 | sum(x == value) | 15910.3 | 12877.78 | 2627.301 | 9050.676 | 1333.207 | 1216.985 |
Figure: Benchmarking of count() and sum(x == value)() on double+n = 10000000 data. Outliers are displayed as crosses. Times are in milliseconds.
R version 3.6.1 Patched (2019-08-27 r77078)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRblas.so
LAPACK: /home/hb/software/R-devel/R-3-6-branch/lib/R/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] microbenchmark_1.4-6 matrixStats_0.55.0-9000 ggplot2_3.2.1
[4] knitr_1.24 R.devices_2.16.0 R.utils_2.9.0
[7] R.oo_1.22.0 R.methodsS3_1.7.1 history_0.0.0-9002
loaded via a namespace (and not attached):
[1] Biobase_2.45.0 bit64_0.9-7 splines_3.6.1
[4] network_1.15 assertthat_0.2.1 highr_0.8
[7] stats4_3.6.1 blob_1.2.0 robustbase_0.93-5
[10] pillar_1.4.2 RSQLite_2.1.2 backports_1.1.4
[13] lattice_0.20-38 glue_1.3.1 digest_0.6.20
[16] colorspace_1.4-1 sandwich_2.5-1 Matrix_1.2-17
[19] XML_3.98-1.20 lpSolve_5.6.13.3 pkgconfig_2.0.2
[22] genefilter_1.66.0 purrr_0.3.2 ergm_3.10.4
[25] xtable_1.8-4 mvtnorm_1.0-11 scales_1.0.0
[28] tibble_2.1.3 annotate_1.62.0 IRanges_2.18.2
[31] TH.data_1.0-10 withr_2.1.2 BiocGenerics_0.30.0
[34] lazyeval_0.2.2 mime_0.7 survival_2.44-1.1
[37] magrittr_1.5 crayon_1.3.4 statnet.common_4.3.0
[40] memoise_1.1.0 laeken_0.5.0 R.cache_0.13.0
[43] MASS_7.3-51.4 R.rsp_0.43.1 tools_3.6.1
[46] multcomp_1.4-10 S4Vectors_0.22.1 trust_0.1-7
[49] munsell_0.5.0 AnnotationDbi_1.46.1 compiler_3.6.1
[52] rlang_0.4.0 grid_3.6.1 RCurl_1.95-4.12
[55] cwhmisc_6.6 rappdirs_0.3.1 labeling_0.3
[58] bitops_1.0-6 base64enc_0.1-3 boot_1.3-23
[61] gtable_0.3.0 codetools_0.2-16 DBI_1.0.0
[64] markdown_1.1 R6_2.4.0 zoo_1.8-6
[67] dplyr_0.8.3 bit_1.1-14 zeallot_0.1.0
[70] parallel_3.6.1 Rcpp_1.0.2 vctrs_0.2.0
[73] DEoptimR_1.0-8 tidyselect_0.2.5 xfun_0.9
[76] coda_0.19-3
Total processing time was 19.52 secs.
To reproduce this report, do:
html <- matrixStats:::benchmark('count')
Copyright Henrik Bengtsson. Last updated on 2019-09-10 20:57:56 (-0700 UTC). Powered by RSP.
<script> var link = document.createElement('link'); link.rel = 'icon'; link.href = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAA21BMVEUAAAAAAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8AAP8BAf4CAv0DA/wdHeIeHuEfH+AgIN8hId4lJdomJtknJ9g+PsE/P8BAQL9yco10dIt1dYp3d4h4eIeVlWqWlmmXl2iYmGeZmWabm2Tn5xjo6Bfp6Rb39wj4+Af//wA2M9hbAAAASXRSTlMAAQIJCgsMJSYnKD4/QGRlZmhpamtsbautrrCxuru8y8zN5ebn6Pn6+///////////////////////////////////////////LsUNcQAAAS9JREFUOI29k21XgkAQhVcFytdSMqMETU26UVqGmpaiFbL//xc1cAhhwVNf6n5i5z67M2dmYOyfJZUqlVLhkKucG7cgmUZTybDz6g0iDeq51PUr37Ds2cy2/C9NeES5puDjxuUk1xnToZsg8pfA3avHQ3lLIi7iWRrkv/OYtkScxBIMgDee0ALoyxHQBJ68JLCjOtQIMIANF7QG9G9fNnHvisCHBVMKgSJgiz7nE+AoBKrAPA3MgepvgR9TSCasrCKH0eB1wBGBFdCO+nAGjMVGPcQb5bd6mQRegN6+1axOs9nGfYcCtfi4NQosdtH7dB+txFIpXQqN1p9B/asRHToyS0jRgpV7nk4nwcq1BJ+x3Gl/v7S9Wmpp/aGquum7w3ZDyrADFYrl8vHBH+ev9AUASW1dmU4h4wAAAABJRU5ErkJggg==" document.getElementsByTagName('head')[0].appendChild(link); </script>