Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph explore #48

Merged
merged 15 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Monarch Knowledge Graph Queries
Description: R package for easy access, manipulation, and analysis of
Monarch KG data Resources.
Version: 1.2.2
Version: 1.4.0
URL: https://github.com/monarch-initiative/monarchr
BugReports: https://github.com/monarch-initiative/monarchr/issues
Authors@R:
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
S3method(cypher_query,neo4j_engine)
S3method(cypher_query_df,neo4j_engine)
S3method(edges,tbl_kgx)
S3method(example_graph,file_engine)
S3method(example_graph,neo4j_engine)
S3method(expand,tbl_kgx)
S3method(explode,tbl_kgx)
S3method(fetch_nodes,file_engine)
Expand All @@ -12,11 +14,14 @@ S3method(knit_print,tbl_kgx)
S3method(nodes,tbl_kgx)
S3method(plot,tbl_kgx)
S3method(summarize_neighborhood,tbl_kgx)
S3method(summary,file_engine)
S3method(summary,neo4j_engine)
export("%in_list%")
export("%~%")
export(cypher_query)
export(cypher_query_df)
export(edges)
export(example_graph)
export(expand)
export(explode)
export(fetch_nodes)
Expand Down Expand Up @@ -55,6 +60,7 @@ importFrom(kableExtra,column_spec)
importFrom(kableExtra,kable)
importFrom(kableExtra,kable_styling)
importFrom(neo2R,cypher)
importFrom(neo2R,multicypher)
importFrom(neo2R,startGraph)
importFrom(purrr,map_chr)
importFrom(readr,col_character)
Expand All @@ -68,6 +74,7 @@ importFrom(stringr,str_wrap)
importFrom(tibble,tibble)
importFrom(tidygraph,activate)
importFrom(tidygraph,as_tibble)
importFrom(tidygraph,graph_join)
importFrom(tidygraph,tbl_graph)
importFrom(utils,capture.output)
importFrom(utils,download.file)
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
# monarchr 1.4.0

## New features

* `example_graph()` function for engines
* Engine `summary()` now returns named lists of available node categories and edge predicates for convenient auto-completion

# monarchr 1.3.0

## New features

* `summary()` function for engines

## Bugfixs

* fix backend bug in Neo4j table queries not handling default params properly
* add batch queries for Neo4j backend engine
* added summary() for KG engines to summarize total node and edge count information

# monarchr 1.2.2

Expand Down
2 changes: 1 addition & 1 deletion R/cypher_query.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a tbl_kgx graph.
#'
#' @param engine A neo4j KG engine
#' @param query A string representing the Cypher query.
#' @param query A string representing the Cypher query. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a single joined graph.
#' @param parameters A list of parameters for the Cypher query. Default is an empty list.
#' @param ... Additional arguments passed to the function.
#' @return The result of the Cypher query as a tbl_kgx graph.
Expand Down
54 changes: 40 additions & 14 deletions R/cypher_query.neo4j_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ stitch_vectors <- function(x) {
}
}

########### Public functions ###########

#' @export
#' @importFrom neo2R cypher
#' @importFrom tibble tibble
cypher_query.neo4j_engine <- function(engine, query, parameters = NULL, ...) { #
res <- neo2R::cypher(engine$graph_conn, query = query, parameters = parameters, result = "graph")
#' Process neo2R cypher to tbl_kgx
#'
#' Given a result from neo2R::cypher returning KGX-formatted nodes and edges,
#' parse the result to generate a tbl_kgx object, attaching the provided engine.
#'
#' @param res The result from neo2R::cypher with result = "graph"
#' @param engine The engine to attach to the returned graph
#' @return A tbl_kgx
neo2r_to_kgx <- function(res, engine) {
relationship_ids_contained <- as.integer(unlist(res$paths))

res <- stitch_vectors(res)
Expand Down Expand Up @@ -103,13 +105,13 @@ cypher_query.neo4j_engine <- function(engine, query, parameters = NULL, ...) { #
# sapply!
# edges_df[[prop_name]] <- sapply(res$relationships, function(edge) {
edges_df[[prop_name]] <- sapply(res$relationships, function(edge) {
# edge$properties[[prop_name]]
prop_value <- edge$properties[[prop_name]]
if(is.null(prop_value)) {
return(NA)
} else {
return(prop_value)
}
# edge$properties[[prop_name]]
prop_value <- edge$properties[[prop_name]]
if(is.null(prop_value)) {
return(NA)
} else {
return(prop_value)
}
})
}

Expand All @@ -121,3 +123,27 @@ cypher_query.neo4j_engine <- function(engine, query, parameters = NULL, ...) { #
attr(g, "relationship_ids") <- relationship_ids_contained
return(g)
}

########### Public functions ###########

#' @export
#' @importFrom neo2R cypher
#' @importFrom neo2R multicypher
#' @importFrom tibble tibble
#' @importFrom tidygraph graph_join
cypher_query.neo4j_engine <- function(engine, query, parameters = NULL, ...) { #
if(length(query) == 1) {
res <- neo2R::cypher(engine$graph_conn, query = query, parameters = parameters, result = "graph")
return(neo2r_to_kgx(res, engine = engine))
} else {
res <- neo2R::multicypher(engine$graph_conn, queries = query, parameters = parameters, result = "graph")
graphs <- lapply(res, neo2r_to_kgx, engine = engine)
g <- tbl_kgx(nodes = data.frame())
for(g2 in graphs) {
suppressMessages(g <- tidygraph::graph_join(g, g2), classes = "message") # suppress joining info
}
return(g)
}


}
8 changes: 4 additions & 4 deletions R/cypher_query_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a data frame.
#'
#' @param engine A neo4j_engine() or derivative providing access to a Neo4j database.
#' @param query A string representing the Cypher query, which should return a table.
#' @param parameters A list of parameters for the Cypher query. Default is an empty list.
#' @param query A string representing the Cypher query, which should return a table. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a list of data frames.
#' @param parameters A list of parameters for the Cypher query, if required.
#' @param ... Additional arguments passed to the function.
#' @return The result of the Cypher query as a data frame.
#' @return The result of the Cypher query as a data frame, or a list of data frames if multiple queries are passed.
#' @export
#' @examplesIf monarch_engine_check()
#' engine <- monarch_engine()
Expand All @@ -17,6 +17,6 @@
#' result <- cypher_query_df(engine, query, parameters)
#' print(result)
#' @importFrom neo2R cypher
cypher_query_df <- function(engine, query, parameters = list(), ...) {
cypher_query_df <- function(engine, query, parameters = NULL, ...) {
UseMethod("cypher_query_df")
}
9 changes: 7 additions & 2 deletions R/cypher_query_df.neo4j_engine.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#' @export
#' @importFrom neo2R cypher
cypher_query_df.neo4j_engine <- function(engine, query, parameters = list(), ...) {
#' @importFrom neo2R multicypher
cypher_query_df.neo4j_engine <- function(engine, query, parameters = NULL, ...) {
if(length(query) == 1) {
result <- neo2R::cypher(engine$graph_conn, query = query, parameters = parameters, result = "row", arraysAsStrings = FALSE)
} else {
result <- neo2R::multicypher(engine$graph_conn, queries = query, parameters = parameters, result = "row", arraysAsStrings = FALSE)
}

return(result)
return(result)
}
31 changes: 31 additions & 0 deletions R/example_graph.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Return an example set of nodes from a KG engine.
#'
#' Given a KG engine, returns a graph representing the diversity
#' of node categories and edge predicates for browsing. The returned graph is guaranteed to
#' contain at least one node of every category, and at least one edge of every
#' predicate. No other guarantees are made: the example graph is not minimal
#' to satisfy these criteria, it is not random or even pseudo-random, and it
#' may not be connected.
#'
#' @param engine A KG engine object
#' @param ... Other parameters (not used)
#' @return A tbl_kgx graph
#' @export
#' @examples
#' # Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#'
#' # prints a readable summary and returns a list of dataframes
#' g <- file_engine(filename) |> example_graph()
#' print(g)
#'
#' @examplesIf monarch_engine_check()
#' # prints a readable summary and returns a list of dataframes
#' g <- monarch_engine() |> example_graph()
#' print(g)
#' @import tidygraph
#' @import dplyr
example_graph <- function(engine, ...) {
UseMethod("example_graph")
}

99 changes: 99 additions & 0 deletions R/example_graph.file_engine.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#' Return an example set of nodes from a KG engine.
#'
#' Given a KGX file-based KG engine, returns a graph representing the diversity
#' of node categories and edge predicates for browsing. The returned graph is guaranteed to
#' contain at least one node of every category, and at least one edge of every
#' predicate. No other guarantees are made: the example graph is not minimal
#' to satisfy these criteria, it is not random or even pseudo-random, and it
#' may not be connected.
#'
#' @param engine A `file_engine` object
#' @param ... Other parameters (not used)
#' @return A tbl_kgx graph
#' @export
#' @examples
#' # Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#'
#' # Retrieve and print an example graph:
#' g <- file_engine(filename) |> example_graph()
#' print(g)
#' @import tidygraph
#' @import dplyr
example_graph.file_engine <- function(engine, ...) {
# first, let's discover the different edge types (predicates) available
edges_df <- engine$graph |>
activate(edges) |>
as.data.frame()

nodes_df <- engine$graph |>
activate(nodes) |>
as.data.frame()


pred_types <- edges_df |>
pull(predicate) |>
unique()

# next we get a bunch of edges of the different predicate types as a graph
sample_edges <- edges_df |>
group_by(predicate) |>
slice(1) |>
ungroup() |>
select(-to, -from)

sample_nodes <- nodes_df |>
filter(id %in% sample_edges$subject | id %in% sample_edges$object)

sample_preds_graph <- tbl_kgx(nodes = sample_nodes, edges = sample_edges, attach_engine = engine)

# this might not represent all categories however.
# So we compute the categories that are represented thus far
used_categories <- sample_preds_graph |>
activate(nodes) |>
as.data.frame() |>
pull(category) |>
unlist() |>
unique()

# get the available categories
all_node_categories <- nodes_df |>
pull(category) |>
unlist() |>
unique()


# compute the node categories that are still needed
needed_categories <- setdiff(all_node_categories, used_categories)

# now sample nodes of those categories, and an arbitrary connection
# trouble is, nodes_df$category is a list column...
sample_cats_node_ids <- needed_categories |> lapply(function(cat) {
has_cat_rows <- which(cat %in_list% nodes_df$category)
return(nodes_df$id[has_cat_rows[1]])
# unique because a single node may be selected to represent multiple needed categories
}) |> unlist() |> unique()

# ok, we have ids that cover the needed categories. Let's grab one row from
# the edges table for each (id could be subject or object)
sample_cats_edges_list <- sample_cats_node_ids |> lapply(function(node_id) {
row <- edges_df |>
filter(node_id == subject | node_id == object) |>
head(n = 1) |>
select(-to, -from)
})
sample_cats_edges <- do.call(rbind, sample_cats_edges_list)

# now we need to select the corresponding nodes via their ids
sample_cats_all_ids <- c(sample_cats_edges$subject, sample_cats_edges$object) |>
unique()

sample_cats_nodes <- nodes_df |>
filter(id %in% sample_cats_all_ids)

# and join it all up
sample_cats_graph <- tbl_kgx(nodes = sample_cats_nodes, edges = sample_cats_edges, attach_engine = engine, )

suppressMessages(all <- kg_join(sample_cats_graph, sample_preds_graph), classes = "message")
return(all)
}
56 changes: 56 additions & 0 deletions R/example_graph.neo4j_engine.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Return an example set of nodes from a KG engine.
#'
#' Given a KGX Neo4j KG engine, returns a graph representing the diversity
#' of node categories and edge predicates for browsing. The returned graph is guaranteed to
#' contain at least one node of every category, and at least one edge of every
#' predicate. No other guarantees are made: the example graph is not minimal
#' to satisfy these criteria, it is not random or even pseudo-random, and it
#' may not be connected.
#'
#' @param engine A `neo4j_engine` object
#' @param ... Other parameters (not used)
#' @return A tbl_kgx graph
#' @export
#' @examplesIf monarch_engine_check()
#' # Retrieve and print an example graph:
#' g <- monarch_engine() |> example_graph()
#' print(g)
#' @import tidygraph
#' @import dplyr
example_graph.neo4j_engine <- function(engine, ...) {
# first, let's discover the different edge types (predicates) available from the schema info
pred_types_query <- "CALL db.schema.visualization() YIELD relationships
UNWIND relationships AS rel
RETURN DISTINCT type(rel) AS predicate"
pred_types <- cypher_query_df(engine, pred_types_query)

# next we get a bunch of edges of the different predicate types as a graph
sample_preds_query <- paste0("MATCH (a)-[r:`", pred_types$predicate, "`]->(b) RETURN a, b, r LIMIT 1")
sample_preds_graph <- cypher_query(engine, query = sample_preds_query)

# this might not represent all categories however.

# So we compute the categories that are represented thus far
used_categories <- sample_preds_graph |>
activate(nodes) |>
as.data.frame() |>
pull(category) |>
unlist() |>
unique()

# get the available categories from the schema
categories_query <- "CALL db.labels() YIELD label RETURN DISTINCT label"
all_node_categories <- cypher_query_df(engine, categories_query)$label

# compute the node categories that are still needed
needed_categories <- setdiff(all_node_categories, used_categories)

# now sample nodes of those categories, and an arbitrary connection
sample_cats_query <- paste0("MATCH (a:`", needed_categories, "`) -[r]- (b) RETURN a, r, b LIMIT 1")
sample_new_cats <- cypher_query(engine, query = sample_cats_query)

# finally, we join the two and return
suppressMessages(full_sample <- kg_join(sample_preds_graph, sample_new_cats), classes = "message")

return(full_sample)
}
Loading
Loading