From e48d50f0a0f127011f84d867bf93bbf815fe4868 Mon Sep 17 00:00:00 2001 From: schochastics Date: Fri, 17 Jan 2025 13:27:56 +0100 Subject: [PATCH 01/14] faster adjacency matrix quering --- R/indexing.R | 177 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 151 insertions(+), 26 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 83716da5ce..a17607dc85 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -1,4 +1,3 @@ - ## IGraph library. ## Copyright (C) 2010-2012 Gabor Csardi ## 334 Harvard street, Cambridge, MA 02139 USA @@ -54,6 +53,139 @@ # - G[1:3,2,eid=TRUE] # create an edge sequence +get_partial_adjacency <- function(x, indices, mode, attr = NULL, sparse = TRUE) { + all_vertices <- seq_len(vcount(x)) + + n_row <- if (mode == "out") { + length(indices) + } else { + length(all_vertices) + } + n_col <- if (mode == "out") { + length(all_vertices) + } else { + length(indices) + } + + adj <- adjacent_vertices(x, indices, mode = mode) + + main_indices <- rep(seq_along(adj), sapply(adj, length)) + other_indices <- unlist(adj) + + el <- if (mode == "out") { + cbind(main_indices, other_indices) + } else { + cbind(other_indices, main_indices) + } + + values <- if (is.null(attr)) { + 1 + } else { + main_vertices <- rep(indices, sapply(adj, length)) + if (mode == "out") { + eids <- get_edge_ids(x, c(rbind(main_vertices, other_indices))) + } else { + eids <- get_edge_ids(x, c(rbind(other_indices, main_vertices))) + } + edge_attr(x, attr, eids[eids != 0]) + } + + if (sparse) { + res <- Matrix::sparseMatrix( + i = el[, 1], + j = el[, 2], + x = values, + dims = c(n_row, n_col) + ) + } else { + res <- matrix(0, nrow = n_row, ncol = n_col) + res[el] <- values + } + if ("name" %in% vertex_attr_names(x)) { + main_names <- vertex_attr(x, "name", indices) + other_names <- vertex_attr(x, "name") + if (mode == "out") { + rownames(res) <- main_names + colnames(res) <- other_names + } else { + rownames(res) <- other_names + colnames(res) <- main_names + } + } + res +} + +get_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { + unique_i <- unique(i) + unique_j <- unique(j) + + i_map <- match(i, unique_i) + j_map <- match(j, unique_j) + + n_row <- length(unique_i) + n_col <- length(unique_j) + + # Create edge list for the unique i, j pairs + el <- expand.grid(unique_i, unique_j) + + # check which edges exist + eids <- get_edge_ids(x, c(rbind(el[, 1], el[, 2]))) + + row_indices <- el[eids != 0, 1] + col_indices <- el[eids != 0, 2] + eids <- eids[eids != 0] + + values <- if (is.null(attr)) { + 1 + } else { + edge_attr(x, attr, eids) + } + + if (sparse) { + unique_res <- Matrix::sparseMatrix( + i = match(row_indices, unique_i), + j = match(col_indices, unique_j), + x = values, + dims = c(n_row, n_col) + ) + } else { + unique_res <- matrix(0, nrow = n_row, ncol = n_col) + unique_res[cbind(match(row_indices, unique_i), match(col_indices, unique_j))] <- values + } + + # Expand the unique result to match duplicated i and j + if (sparse) { + res <- unique_res[i_map, j_map, drop = TRUE] + } else { + res <- unique_res[i_map, j_map] + } + + if ("name" %in% vertex_attr_names(x) && !is.null(dim(res))) { + rownames(res) <- vertex_attr(x, "name", i) + colnames(res) <- vertex_attr(x, "name", j) + } + + + res +} + +clean_indices <- function(x, indices) { + if (is.character(indices)) { + return(match(indices, V(x)$name)) + } + if (is.logical(indices)) { + if (length(indices) != vcount(x)) { + indices <- which(rep(indices, length.out = vcount(x))) + } else { + indices <- which(indices) + } + return(indices) + } + if (all(indices < 0)) { + return(seq_len(vcount(x))[indices]) + } + indices +} #' Query and manipulate a graph as it were an adjacency matrix #' @@ -156,8 +288,6 @@ sparse = igraph_opt("sparsematrices"), edges = FALSE, drop = TRUE, attr = if (is_weighted(x)) "weight" else NULL) { - ## TODO: make it faster, don't need the whole matrix usually - ################################################################ ## Argument checks if ((!missing(from) || !missing(to)) && @@ -193,31 +323,26 @@ } else { res <- as.logical(res) + 0 } - res - } else if (missing(i) && missing(j)) { - if (missing(edges)) { - as_adjacency_matrix(x, sparse = sparse, attr = attr) - } else { - as_adjacency_matrix(x, sparse = sparse, attr = attr, edges = edges) - } - } else if (missing(j)) { - if (missing(edges)) { - as_adjacency_matrix(x, sparse = sparse, attr = attr)[i, , drop = drop] - } else { - as_adjacency_matrix(x, sparse = sparse, attr = attr, edges = edges)[i, , drop = drop] - } + return(res) + } + + # convert logical, character or negative i/j to proper vertex ids + if (!missing(i)) { + i <- clean_indices(x, i) + } + if (!missing(j)) { + j <- clean_indices(x, j) + } + + if (missing(i) && missing(j)) { + return(as_adjacency_matrix(x, sparse = sparse, attr = attr)) + } + if (missing(j)) { + get_partial_adjacency(x, indices = i, mode = "out", attr = attr, sparse = sparse) } else if (missing(i)) { - if (missing(edges)) { - as_adjacency_matrix(x, sparse = sparse, attr = attr)[, j, drop = drop] - } else { - as_adjacency_matrix(x, sparse = sparse, attr = attr, edges = edges)[, j, drop = drop] - } + get_partial_adjacency(x, indices = j, mode = "in", attr = attr, sparse = sparse) } else { - if (missing(edges)) { - as_adjacency_matrix(x, sparse = sparse, attr = attr)[i, j, drop = drop] - } else { - as_adjacency_matrix(x, sparse = sparse, attr = attr, edges = edges)[i, j, drop = drop] - } + get_submatrix(x, i, j, attr = attr, sparse = sparse) } } From e5e828b3f1d5571e59e4d2b35bab4cd19cfe3531 Mon Sep 17 00:00:00 2001 From: schochastics Date: Fri, 17 Jan 2025 19:46:41 +0100 Subject: [PATCH 02/14] replaced clean_indices with as_igraph_vs --- R/indexing.R | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index a17607dc85..0bb7962a84 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -169,24 +169,6 @@ get_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { res } -clean_indices <- function(x, indices) { - if (is.character(indices)) { - return(match(indices, V(x)$name)) - } - if (is.logical(indices)) { - if (length(indices) != vcount(x)) { - indices <- which(rep(indices, length.out = vcount(x))) - } else { - indices <- which(indices) - } - return(indices) - } - if (all(indices < 0)) { - return(seq_len(vcount(x))[indices]) - } - indices -} - #' Query and manipulate a graph as it were an adjacency matrix #' #' @details @@ -328,10 +310,10 @@ clean_indices <- function(x, indices) { # convert logical, character or negative i/j to proper vertex ids if (!missing(i)) { - i <- clean_indices(x, i) + i <- as_igraph_vs(x, i) } if (!missing(j)) { - j <- clean_indices(x, j) + j <- as_igraph_vs(x, j) } if (missing(i) && missing(j)) { From 44561c553f70fe0fec8a340ab3de1609937def39 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sat, 18 Jan 2025 13:19:57 +0100 Subject: [PATCH 03/14] unify helper function for querying --- R/indexing.R | 110 ++++++++++++--------------------------------------- 1 file changed, 26 insertions(+), 84 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 0bb7962a84..2b27f986ea 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -53,111 +53,53 @@ # - G[1:3,2,eid=TRUE] # create an edge sequence -get_partial_adjacency <- function(x, indices, mode, attr = NULL, sparse = TRUE) { - all_vertices <- seq_len(vcount(x)) +get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = TRUE) { + # If i or j is NULL, assume all nodes + if (is.null(i)) i <- seq_len(vcount(x)) + if (is.null(j)) j <- seq_len(vcount(x)) - n_row <- if (mode == "out") { - length(indices) - } else { - length(all_vertices) - } - n_col <- if (mode == "out") { - length(all_vertices) - } else { - length(indices) - } - - adj <- adjacent_vertices(x, indices, mode = mode) - - main_indices <- rep(seq_along(adj), sapply(adj, length)) - other_indices <- unlist(adj) - - el <- if (mode == "out") { - cbind(main_indices, other_indices) - } else { - cbind(other_indices, main_indices) - } - - values <- if (is.null(attr)) { - 1 - } else { - main_vertices <- rep(indices, sapply(adj, length)) - if (mode == "out") { - eids <- get_edge_ids(x, c(rbind(main_vertices, other_indices))) - } else { - eids <- get_edge_ids(x, c(rbind(other_indices, main_vertices))) - } - edge_attr(x, attr, eids[eids != 0]) - } - - if (sparse) { - res <- Matrix::sparseMatrix( - i = el[, 1], - j = el[, 2], - x = values, - dims = c(n_row, n_col) - ) - } else { - res <- matrix(0, nrow = n_row, ncol = n_col) - res[el] <- values - } - if ("name" %in% vertex_attr_names(x)) { - main_names <- vertex_attr(x, "name", indices) - other_names <- vertex_attr(x, "name") - if (mode == "out") { - rownames(res) <- main_names - colnames(res) <- other_names - } else { - rownames(res) <- other_names - colnames(res) <- main_names - } - } - res -} - -get_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { + # Handle duplicates unique_i <- unique(i) unique_j <- unique(j) + # Create mapping between to unique values i_map <- match(i, unique_i) j_map <- match(j, unique_j) - n_row <- length(unique_i) - n_col <- length(unique_j) - - # Create edge list for the unique i, j pairs - el <- expand.grid(unique_i, unique_j) + adj <- adjacent_vertices(x, unique_i, mode = "out") - # check which edges exist - eids <- get_edge_ids(x, c(rbind(el[, 1], el[, 2]))) + edge_list <- cbind(rep(unique_i, sapply(adj, length)), unlist(adj)) + edge_list <- edge_list[edge_list[, 2] %in% unique_j, , drop = FALSE] - row_indices <- el[eids != 0, 1] - col_indices <- el[eids != 0, 2] - eids <- eids[eids != 0] + row_indices <- edge_list[, 1] + col_indices <- edge_list[, 2] values <- if (is.null(attr)) { 1 } else { - edge_attr(x, attr, eids) + # get edge ids to locate edge attribute values + valid_edges <- get_edge_ids(x, c(t(edge_list))) + edge_attr(x, attr, valid_edges) } + # Construct sparse or dense result matrix if (sparse) { unique_res <- Matrix::sparseMatrix( i = match(row_indices, unique_i), j = match(col_indices, unique_j), x = values, - dims = c(n_row, n_col) + dims = c(length(unique_i), length(unique_j)) ) } else { - unique_res <- matrix(0, nrow = n_row, ncol = n_col) + unique_res <- matrix(0, nrow = length(unique_i), ncol = length(unique_j)) unique_res[cbind(match(row_indices, unique_i), match(col_indices, unique_j))] <- values } - # Expand the unique result to match duplicated i and j - if (sparse) { - res <- unique_res[i_map, j_map, drop = TRUE] + # Expand to handle duplicated entries in i and j + res <- if (sparse) { + unique_res[i_map, j_map, drop = TRUE] } else { - res <- unique_res[i_map, j_map] + unique_res[i_map, j_map] } if ("name" %in% vertex_attr_names(x) && !is.null(dim(res))) { @@ -165,10 +107,10 @@ get_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { colnames(res) <- vertex_attr(x, "name", j) } - - res + return(res) } + #' Query and manipulate a graph as it were an adjacency matrix #' #' @details @@ -320,11 +262,11 @@ get_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { return(as_adjacency_matrix(x, sparse = sparse, attr = attr)) } if (missing(j)) { - get_partial_adjacency(x, indices = i, mode = "out", attr = attr, sparse = sparse) + get_adjacency_submatrix(x, i = i, j = NULL, attr = attr, sparse = sparse) } else if (missing(i)) { - get_partial_adjacency(x, indices = j, mode = "in", attr = attr, sparse = sparse) + get_adjacency_submatrix(x, i = NULL, j = j, attr = attr, sparse = sparse) } else { - get_submatrix(x, i, j, attr = attr, sparse = sparse) + get_adjacency_submatrix(x, i = i, j = j, attr = attr, sparse = sparse) } } From ff45667539319abe8f21644910db931546431912 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 22 Jan 2025 11:39:26 +0100 Subject: [PATCH 04/14] refactor handling of unique and edge_list creation --- R/indexing.R | 63 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 2b27f986ea..2e0adbae55 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -55,21 +55,28 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = TRUE) { # If i or j is NULL, assume all nodes - if (is.null(i)) i <- seq_len(vcount(x)) - if (is.null(j)) j <- seq_len(vcount(x)) - - # Handle duplicates - unique_i <- unique(i) - unique_j <- unique(j) + #if not NULL make sure to handle duplicates correctly + if (is.null(i)) { + i <- i_unique <- i_map <- seq_len(vcount(x)) + } else { + i_unique <- unique(i) + i_map <- match(i, i_unique) + } - # Create mapping between to unique values - i_map <- match(i, unique_i) - j_map <- match(j, unique_j) + if (is.null(j)) { + j <- j_unique <- j_map <- seq_len(vcount(x)) + } else { + j_unique <- unique(j) + j_map <- match(j, j_unique) + } - adj <- adjacent_vertices(x, unique_i, mode = "out") + adj <- adjacent_vertices(x, i_unique, mode = "out") + i_degree <- purrr::map_int(adj, length) - edge_list <- cbind(rep(unique_i, sapply(adj, length)), unlist(adj)) - edge_list <- edge_list[edge_list[, 2] %in% unique_j, , drop = FALSE] + from_id <- rep(i_unique, i_degree) + to_id <- unlist(adj) + edge_list <- cbind(from_id, to_id) + edge_list <- edge_list[edge_list[, 2] %in% j_unique, , drop = FALSE] row_indices <- edge_list[, 1] col_indices <- edge_list[, 2] @@ -85,14 +92,16 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = # Construct sparse or dense result matrix if (sparse) { unique_res <- Matrix::sparseMatrix( - i = match(row_indices, unique_i), - j = match(col_indices, unique_j), + i = match(row_indices, i_unique), + j = match(col_indices, j_unique), x = values, - dims = c(length(unique_i), length(unique_j)) + dims = c(length(i_unique), length(j_unique)) ) } else { - unique_res <- matrix(0, nrow = length(unique_i), ncol = length(unique_j)) - unique_res[cbind(match(row_indices, unique_i), match(col_indices, unique_j))] <- values + unique_res <- matrix(0, nrow = length(i_unique), ncol = length(j_unique)) + unique_res[ + cbind(match(row_indices, i_unique), match(col_indices, j_unique)) + ] <- values } # Expand to handle duplicated entries in i and j @@ -209,7 +218,7 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = #' @method [ igraph #' @export `[.igraph` <- function(x, i, j, ..., from, to, - sparse = igraph_opt("sparsematrices"), + sparse = igraph_opt("sparsematrices"), edges = FALSE, drop = TRUE, attr = if (is_weighted(x)) "weight" else NULL) { ################################################################ @@ -382,7 +391,7 @@ length.igraph <- function(x) { #' @family functions for manipulating graph structure #' @export `[<-.igraph` <- function(x, i, j, ..., from, to, - attr = if (is_weighted(x)) "weight" else NULL, + attr = if (is_weighted(x)) "weight" else NULL, value) { ## TODO: rewrite this in C to make it faster @@ -419,7 +428,7 @@ length.igraph <- function(x) { if (!missing(from)) { if (is.null(value) || - (is.logical(value) && !value) || + (is.logical(value) && !value) || (is.null(attr) && is.numeric(value) && value == 0)) { ## Delete edges todel <- x[from = from, to = to, ..., edges = TRUE] @@ -436,7 +445,7 @@ length.igraph <- function(x) { } } } else if (is.null(value) || - (is.logical(value) && !value) || + (is.logical(value) && !value) || (is.null(attr) && is.numeric(value) && value == 0)) { ## Delete edges if (missing(i) && missing(j)) { @@ -458,12 +467,12 @@ length.igraph <- function(x) { exe <- lapply(x[[i, j, ..., edges = TRUE]], as.vector) exv <- lapply(x[[i, j, ...]], as.vector) toadd <- unlist(lapply(seq_along(exv), function(idx) { - to <- setdiff(j, exv[[idx]]) - if (length(to != 0)) { - rbind(i[idx], setdiff(j, exv[[idx]])) - } else { - numeric() - } + to <- setdiff(j, exv[[idx]]) + if (length(to != 0)) { + rbind(i[idx], setdiff(j, exv[[idx]])) + } else { + numeric() + } })) ## Do the changes if (is.null(attr)) { From f6912f86a70f406e0810e19003035bacbbc6a496 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 22 Jan 2025 11:55:13 +0100 Subject: [PATCH 05/14] added tests --- tests/testthat/test-indexing.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/testthat/test-indexing.R b/tests/testthat/test-indexing.R index db4f36d26c..cad77c4136 100644 --- a/tests/testthat/test-indexing.R +++ b/tests/testthat/test-indexing.R @@ -294,3 +294,31 @@ test_that("[[ handles from and to properly even if the graph has conflicting ver expect_true(is_igraph_vs(g[[1:3, 2:6]][[1]])) expect_true(is_igraph_vs(g[[from = 1:3, to = 2:6]][[1]])) }) + +test_that("[ handles errors in input parameters well", { + g <- make_full_graph(10) + expect_error(g[from = 1, to = 1, i = 1, j = 1]) + expect_error(g[from = 1]) + expect_error(g[to = 1]) + expect_error(g[from = NA, to = 2]) + expect_error(g[from = 1, to = NA]) + expect_error(g[from = 1, to = c(1, 2)]) +}) + +test_that("[ handles all combinations of i and/or j", { + A <- matrix( + rep( + c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), + c( + 4L, 1L, 4L, 1L, 2L, 1L, 5L, 2L, 3L, 1L, 10L, 3L, 9L, 1L, 1L, 1L, 3L, 1L, 1L, + 1L, 1L, 1L, 10L, 1L, 1L, 1L, 1L, 5L, 11L, 1L, 2L, 1L, 5L, 1L, 3L + ) + ), + nrow = 10L, + ncol = 10L + ) + g <- graph_from_adjacency_matrix(A, "directed") + expect_equal(canonicalize_matrix(g[1:3, ]), A[1:3, ]) + expect_equal(canonicalize_matrix(g[, 4:7]), A[, 4:7]) + expect_equal(canonicalize_matrix(g[1:3, 4:7]), A[1:3, 4:7]) +}) From 56ba6e9d8d69df0dac2b3fc7fa7ea1431769ca73 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 22 Jan 2025 12:04:30 +0100 Subject: [PATCH 06/14] remove purrr --- R/indexing.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 2e0adbae55..3e7ec88896 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -55,7 +55,7 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = TRUE) { # If i or j is NULL, assume all nodes - #if not NULL make sure to handle duplicates correctly + # if not NULL make sure to handle duplicates correctly if (is.null(i)) { i <- i_unique <- i_map <- seq_len(vcount(x)) } else { @@ -71,7 +71,7 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = } adj <- adjacent_vertices(x, i_unique, mode = "out") - i_degree <- purrr::map_int(adj, length) + i_degree <- map_int(adj, length) from_id <- rep(i_unique, i_degree) to_id <- unlist(adj) @@ -218,9 +218,9 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = #' @method [ igraph #' @export `[.igraph` <- function(x, i, j, ..., from, to, - sparse = igraph_opt("sparsematrices"), - edges = FALSE, drop = TRUE, - attr = if (is_weighted(x)) "weight" else NULL) { + sparse = igraph_opt("sparsematrices"), + edges = FALSE, drop = TRUE, + attr = if (is_weighted(x)) "weight" else NULL) { ################################################################ ## Argument checks if ((!missing(from) || !missing(to)) && @@ -391,8 +391,8 @@ length.igraph <- function(x) { #' @family functions for manipulating graph structure #' @export `[<-.igraph` <- function(x, i, j, ..., from, to, - attr = if (is_weighted(x)) "weight" else NULL, - value) { + attr = if (is_weighted(x)) "weight" else NULL, + value) { ## TODO: rewrite this in C to make it faster ################################################################ @@ -428,7 +428,7 @@ length.igraph <- function(x) { if (!missing(from)) { if (is.null(value) || - (is.logical(value) && !value) || + (is.logical(value) && !value) || (is.null(attr) && is.numeric(value) && value == 0)) { ## Delete edges todel <- x[from = from, to = to, ..., edges = TRUE] @@ -445,7 +445,7 @@ length.igraph <- function(x) { } } } else if (is.null(value) || - (is.logical(value) && !value) || + (is.logical(value) && !value) || (is.null(attr) && is.numeric(value) && value == 0)) { ## Delete edges if (missing(i) && missing(j)) { @@ -467,12 +467,12 @@ length.igraph <- function(x) { exe <- lapply(x[[i, j, ..., edges = TRUE]], as.vector) exv <- lapply(x[[i, j, ...]], as.vector) toadd <- unlist(lapply(seq_along(exv), function(idx) { - to <- setdiff(j, exv[[idx]]) - if (length(to != 0)) { - rbind(i[idx], setdiff(j, exv[[idx]])) - } else { - numeric() - } + to <- setdiff(j, exv[[idx]]) + if (length(to != 0)) { + rbind(i[idx], setdiff(j, exv[[idx]])) + } else { + numeric() + } })) ## Do the changes if (is.null(attr)) { From ece42d0de3e303d0d15694f49fbc69ee1693f3c9 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 22 Jan 2025 15:19:04 +0100 Subject: [PATCH 07/14] added tests for duplicated i/j --- tests/testthat/test-indexing.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testthat/test-indexing.R b/tests/testthat/test-indexing.R index cad77c4136..370549fcae 100644 --- a/tests/testthat/test-indexing.R +++ b/tests/testthat/test-indexing.R @@ -322,3 +322,21 @@ test_that("[ handles all combinations of i and/or j", { expect_equal(canonicalize_matrix(g[, 4:7]), A[, 4:7]) expect_equal(canonicalize_matrix(g[1:3, 4:7]), A[1:3, 4:7]) }) + +test_that("[ handles duplicated i/j well", { + A <- matrix( + rep( + c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), + c( + 4L, 1L, 4L, 1L, 2L, 1L, 5L, 2L, 3L, 1L, 10L, 3L, 9L, 1L, 1L, 1L, 3L, 1L, 1L, + 1L, 1L, 1L, 10L, 1L, 1L, 1L, 1L, 5L, 11L, 1L, 2L, 1L, 5L, 1L, 3L + ) + ), + nrow = 10L, + ncol = 10L + ) + g <- graph_from_adjacency_matrix(A, "directed") + expect_equal(canonicalize_matrix(g[c(1, 2, 2), ]), A[c(1, 2, 2), ]) + expect_equal(canonicalize_matrix(g[, c(3, 3, 4, 4)]), A[, c(3, 3, 4, 4)]) + expect_equal(canonicalize_matrix(g[c(1, 2, 2), c(3, 3, 4, 4)]), A[c(1, 2, 2), c(3, 3, 4, 4)]) +}) From 722762674e1debcf9e5c3de795bf9f8ca704e282 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 23 Jan 2025 21:45:31 +0100 Subject: [PATCH 08/14] make dense case as.matrix(sparse) --- R/indexing.R | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 3e7ec88896..98b2e98ce4 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -75,6 +75,7 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = from_id <- rep(i_unique, i_degree) to_id <- unlist(adj) + edge_list <- cbind(from_id, to_id) edge_list <- edge_list[edge_list[, 2] %in% j_unique, , drop = FALSE] @@ -89,26 +90,19 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = edge_attr(x, attr, valid_edges) } - # Construct sparse or dense result matrix - if (sparse) { - unique_res <- Matrix::sparseMatrix( - i = match(row_indices, i_unique), - j = match(col_indices, j_unique), - x = values, - dims = c(length(i_unique), length(j_unique)) - ) - } else { - unique_res <- matrix(0, nrow = length(i_unique), ncol = length(j_unique)) - unique_res[ - cbind(match(row_indices, i_unique), match(col_indices, j_unique)) - ] <- values - } + + unique_res <- Matrix::sparseMatrix( + i = match(row_indices, i_unique), + j = match(col_indices, j_unique), + x = values, + dims = c(length(i_unique), length(j_unique)) + ) # Expand to handle duplicated entries in i and j - res <- if (sparse) { - unique_res[i_map, j_map, drop = TRUE] - } else { - unique_res[i_map, j_map] + res <- unique_res[i_map, j_map, drop = TRUE] + + if (!sparse) { + res <- as.matrix(res) } if ("name" %in% vertex_attr_names(x) && !is.null(dim(res))) { From e5baaa5a7ffdafab1833568e3c0398db591ec1c9 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 30 Jan 2025 21:39:53 +0100 Subject: [PATCH 09/14] pulled duplication handling out of sumatrix function --- R/indexing.R | 74 +++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 423dfa3318..a879d7bcbd 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -53,31 +53,24 @@ # - G[1:3,2,eid=TRUE] # create an edge sequence -get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = TRUE) { +get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { # If i or j is NULL, assume all nodes # if not NULL make sure to handle duplicates correctly - if (is.null(i)) { - i <- i_unique <- i_map <- seq_len(vcount(x)) - } else { - i_unique <- unique(i) - i_map <- match(i, i_unique) + if (missing(i)) { + i <- seq_len(vcount(x)) } - - if (is.null(j)) { - j <- j_unique <- j_map <- seq_len(vcount(x)) - } else { - j_unique <- unique(j) - j_map <- match(j, j_unique) + if (missing(j)) { + j <- seq_len(vcount(x)) } - adj <- adjacent_vertices(x, i_unique, mode = "out") + adj <- adjacent_vertices(x, i, mode = "out") i_degree <- map_int(adj, length) - from_id <- rep(i_unique, i_degree) + from_id <- rep(i, i_degree) to_id <- unlist(adj) edge_list <- cbind(from_id, to_id) - edge_list <- edge_list[edge_list[, 2] %in% j_unique, , drop = FALSE] + edge_list <- edge_list[edge_list[, 2] %in% j, , drop = FALSE] row_indices <- edge_list[, 1] col_indices <- edge_list[, 2] @@ -85,22 +78,18 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = values <- if (is.null(attr)) { 1 } else { - # get edge ids to locate edge attribute values valid_edges <- get_edge_ids(x, c(t(edge_list))) edge_attr(x, attr, valid_edges) } - unique_res <- Matrix::sparseMatrix( - i = match(row_indices, i_unique), - j = match(col_indices, j_unique), + res <- Matrix::sparseMatrix( + i = match(row_indices, i), + j = match(col_indices, j), x = values, - dims = c(length(i_unique), length(j_unique)) + dims = c(length(i), length(j)) ) - # Expand to handle duplicated entries in i and j - res <- unique_res[i_map, j_map, drop = TRUE] - if (!sparse) { res <- as.matrix(res) } @@ -110,7 +99,7 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = colnames(res) <- vertex_attr(x, "name", j) } - return(res) + return(res[, , drop = FALSE]) } @@ -211,7 +200,8 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = #' #' @method [ igraph #' @export -`[.igraph` <- function(x, i, j, ..., from, to, +`[.igraph` <- function( + x, i, j, ..., from, to, sparse = igraph_opt("sparsematrices"), edges = FALSE, drop = TRUE, attr = if (is_weighted(x)) "weight" else NULL) { @@ -253,24 +243,42 @@ get_adjacency_submatrix <- function(x, i = NULL, j = NULL, attr = NULL, sparse = return(res) } + if (missing(i) && missing(j)) { + return(as_adjacency_matrix(x, sparse = sparse, attr = attr)) + } + # convert logical, character or negative i/j to proper vertex ids + # also check if any vertex is duplicated and record a mapping + i_has_dupes <- FALSE + j_has_dupes <- FALSE + if (!missing(i)) { i <- as_igraph_vs(x, i) + if (anyDuplicated(i)) { + i_has_dupes <- TRUE + i_dupl <- i + i <- unique(i) + i_map <- match(i_dupl, i) + } } if (!missing(j)) { j <- as_igraph_vs(x, j) + if (anyDuplicated(j)) { + j_has_dupes <- TRUE + j_dupl <- j + j <- unique(j) + j_map <- match(j_dupl, j) + } } - if (missing(i) && missing(j)) { - return(as_adjacency_matrix(x, sparse = sparse, attr = attr)) + sub_adjmat <- get_adjacency_submatrix(x, i = i, j = j, attr = attr, sparse = sparse) + if (i_has_dupes) { + sub_adjmat <- sub_adjmat[i_map, , drop = FALSE] } - if (missing(j)) { - get_adjacency_submatrix(x, i = i, j = NULL, attr = attr, sparse = sparse) - } else if (missing(i)) { - get_adjacency_submatrix(x, i = NULL, j = j, attr = attr, sparse = sparse) - } else { - get_adjacency_submatrix(x, i = i, j = j, attr = attr, sparse = sparse) + if (j_has_dupes) { + sub_adjmat <- sub_adjmat[, j_map, drop = FALSE] } + sub_adjmat[, , drop = drop] } #' Query and manipulate a graph as it were an adjacency list From 39348a18954115002f2e86ec14cb1d07acc14652 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 30 Jan 2025 21:54:55 +0100 Subject: [PATCH 10/14] adjusted for new get_edge_ids() --- R/indexing.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index a879d7bcbd..83303af54c 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -69,16 +69,16 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { from_id <- rep(i, i_degree) to_id <- unlist(adj) - edge_list <- cbind(from_id, to_id) - edge_list <- edge_list[edge_list[, 2] %in% j, , drop = FALSE] + edge_list <- data.frame(from = as.integer(from_id), to = as.integer(to_id)) + edge_list <- edge_list[edge_list$to %in% j, ] - row_indices <- edge_list[, 1] - col_indices <- edge_list[, 2] + row_indices <- edge_list[[1]] + col_indices <- edge_list[[2]] values <- if (is.null(attr)) { 1 } else { - valid_edges <- get_edge_ids(x, c(t(edge_list))) + valid_edges <- get_edge_ids(x, edge_list) edge_attr(x, attr, valid_edges) } @@ -230,7 +230,7 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { ################################################################## if (!missing(from)) { - res <- get_edge_ids(x, rbind(from, to), error = FALSE) + res <- get_edge_ids(x, data.frame(from, to), error = FALSE) if (edges) { ## nop } else if (!is.null(attr)) { From a1f1409d78a49d75647683c38f533f8d49cbf091 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sun, 2 Feb 2025 20:43:18 +0100 Subject: [PATCH 11/14] added skip for old Matrix versions in tests --- tests/testthat/test-indexing.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-indexing.R b/tests/testthat/test-indexing.R index 370549fcae..6953111735 100644 --- a/tests/testthat/test-indexing.R +++ b/tests/testthat/test-indexing.R @@ -1,4 +1,5 @@ test_that("[ indexing works", { + skip_if_not_installed("Matrix", minimum_version = "1.6.0") g <- make_tree(20) ## Are these vertices connected? expect_equal(g[1, 2], 1) @@ -9,6 +10,7 @@ test_that("[ indexing works", { }) test_that("[ indexing works with symbolic names", { + skip_if_not_installed("Matrix", minimum_version = "1.6.0") g <- make_test_named_tree() expect_equal(g["a", "b"], 1) @@ -28,6 +30,7 @@ test_that("[ indexing works with symbolic names", { }) test_that("[ indexing works with logical vectors", { + skip_if_not_installed("Matrix", minimum_version = "1.6.0") g <- make_test_named_tree() lres <- structure( @@ -70,6 +73,7 @@ test_that("[ indexing works with negative indices", { }) test_that("[ indexing works with weighted graphs", { + skip_if_not_installed("Matrix", minimum_version = "1.6.0") g <- make_test_weighted_tree() expect_equal(g[1, 2], 2) From 2c6b0706dbbfb82075e25af42c8ef236f1e02b02 Mon Sep 17 00:00:00 2001 From: schochastics Date: Tue, 4 Feb 2025 10:38:43 +0100 Subject: [PATCH 12/14] added Matrix version requirement --- tests/testthat/test-indexing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-indexing.R b/tests/testthat/test-indexing.R index 6953111735..2bc8a2894f 100644 --- a/tests/testthat/test-indexing.R +++ b/tests/testthat/test-indexing.R @@ -87,6 +87,7 @@ test_that("[ indexing works with weighted graphs", { }) test_that("[ indexing works with weighted graphs and symbolic names", { + skip_if_not_installed("Matrix", minimum_version = "1.6.0") g <- make_test_weighted_tree() expect_equal(g["a", "b"], 2) From 4113e8b2e4e4329e9b9e80e7fea0a39e73a53448 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 6 Feb 2025 14:14:17 +0100 Subject: [PATCH 13/14] removed sparse parameter from subroutine and droped its drop argument --- R/indexing.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index 83303af54c..bd3e7e58e5 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -53,7 +53,7 @@ # - G[1:3,2,eid=TRUE] # create an edge sequence -get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { +get_adjacency_submatrix <- function(x, i, j, attr = NULL) { # If i or j is NULL, assume all nodes # if not NULL make sure to handle duplicates correctly if (missing(i)) { @@ -90,16 +90,12 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { dims = c(length(i), length(j)) ) - if (!sparse) { - res <- as.matrix(res) - } - if ("name" %in% vertex_attr_names(x) && !is.null(dim(res))) { rownames(res) <- vertex_attr(x, "name", i) colnames(res) <- vertex_attr(x, "name", j) } - return(res[, , drop = FALSE]) + res } @@ -271,14 +267,20 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL, sparse = TRUE) { } } - sub_adjmat <- get_adjacency_submatrix(x, i = i, j = j, attr = attr, sparse = sparse) + sub_adjmat <- get_adjacency_submatrix(x, i = i, j = j, attr = attr) if (i_has_dupes) { sub_adjmat <- sub_adjmat[i_map, , drop = FALSE] } if (j_has_dupes) { sub_adjmat <- sub_adjmat[, j_map, drop = FALSE] } - sub_adjmat[, , drop = drop] + + if (!sparse) { + as.matrix(sub_adjmat[, , drop = drop]) + } else{ + sub_adjmat[, , drop = drop] + } + } #' Query and manipulate a graph as it were an adjacency list From 11ed1184e214cfeb69ccef4a8253b7da306c05d4 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 6 Feb 2025 14:36:01 +0100 Subject: [PATCH 14/14] more efficient handling of missing i and j --- R/indexing.R | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/R/indexing.R b/R/indexing.R index bd3e7e58e5..453392e158 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -57,21 +57,31 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL) { # If i or j is NULL, assume all nodes # if not NULL make sure to handle duplicates correctly if (missing(i)) { - i <- seq_len(vcount(x)) + i_seq <- seq_len(vcount(x)) + has_i <- FALSE + } else{ + i_seq <- i + has_i <- TRUE } if (missing(j)) { - j <- seq_len(vcount(x)) + j_seq <- seq_len(vcount(x)) + has_j <- FALSE + } else { + j_seq <- j + has_j <- TRUE } - adj <- adjacent_vertices(x, i, mode = "out") + adj <- adjacent_vertices(x, i_seq, mode = "out") i_degree <- map_int(adj, length) - from_id <- rep(i, i_degree) + from_id <- rep(i_seq, i_degree) to_id <- unlist(adj) edge_list <- data.frame(from = as.integer(from_id), to = as.integer(to_id)) - edge_list <- edge_list[edge_list$to %in% j, ] - + if(has_j){ + edge_list <- edge_list[edge_list$to %in% j_seq, ] + } + row_indices <- edge_list[[1]] col_indices <- edge_list[[2]] @@ -82,17 +92,16 @@ get_adjacency_submatrix <- function(x, i, j, attr = NULL) { edge_attr(x, attr, valid_edges) } - res <- Matrix::sparseMatrix( - i = match(row_indices, i), - j = match(col_indices, j), + i = if (has_i) match(row_indices, i_seq) else row_indices, + j = if (has_j) match(col_indices, j_seq) else col_indices, x = values, - dims = c(length(i), length(j)) + dims = c(length(i_seq), length(j_seq)) ) if ("name" %in% vertex_attr_names(x) && !is.null(dim(res))) { - rownames(res) <- vertex_attr(x, "name", i) - colnames(res) <- vertex_attr(x, "name", j) + rownames(res) <- vertex_attr(x, "name", i_seq) + colnames(res) <- vertex_attr(x, "name", j_seq) } res