-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dm as the data model engine #94
Comments
Methods for library(silicate)
as_dm.SC <- function(x) {
unclass(x) %>% as_dm() %>%
cdm_add_pk(object, object_) %>%
cdm_add_fk(object_link_edge, object_, object) %>%
cdm_add_pk(edge, edge_) %>%
cdm_add_fk(object_link_edge, edge_, edge) %>%
cdm_add_pk(vertex, vertex_) %>%
cdm_add_fk(edge, .vx0, vertex) %>%
cdm_add_fk(edge, .vx1, vertex)
}
as_dm.TRI <- function(x) {
unclass(x) %>% as_dm() %>%
cdm_add_pk(object, object_) %>%
cdm_add_fk(triangle, object_, object) %>%
cdm_add_pk(vertex, vertex_) %>%
cdm_add_fk(triangle, .vx0, vertex) %>%
cdm_add_fk(triangle, .vx1, vertex) %>%
cdm_add_fk(triangle, .vx2, vertex)
}
as_dm.ARC <- function(x) {
unclass(x) %>% as_dm() %>%
cdm_add_pk(object, object_) %>%
cdm_add_fk(object_link_arc, object_, object) %>%
## here we need table normalization (somehow, possibly by composing ARC from SC)
cdm_add_pk(arc_link_vertex, arc_, check = FALSE) %>%
cdm_add_pk(vertex, vertex_) %>%
cdm_add_fk(arc_link_vertex, vertex_, vertex)
}
as_dm.PATH <- function(x) {
unclass(x) %>% as_dm() %>%
cdm_add_pk(object, object_) %>%
cdm_add_fk(path, object_, object) %>%
## here we need table normalization (somehow, possibly by composing PATH from SC)
cdm_add_pk(path_link_vertex, path_, check = FALSE) %>%
cdm_add_pk(vertex, vertex_) %>%
cdm_add_fk(path_link_vertex, vertex_, vertex)
}
library(dm)
as_dm(SC(minimal_mesh))
as_dm(TRI(minimal_mesh))
as_dm(ARC(minimal_mesh))
as_dm(PATH(minimal_mesh))
|
unjoin::unjoin(mtcars, am, gear, carb, key_col = "parent_table")
decompose_table(mtcars, new_id, am, gear, carb) |
That's awesome - I hadn't even seen that |
We don't currently handle cycles (=parallel edges in this case) very well. Would a "longer form" be suitable for the triangle? |
I've actually grappled with that from very early on, and pretty sure I started with a long triangle form. A follow up question, when you declare which table to That originally seemed to me to be the way to go, but maybe it's better to always declare the table in these verbs? What follows is just here as a note to self, it took me a bit of to and fro to get it working, but it's worth exploring. Here I try it out on-the-fly with conversion to and from dm, with a filter on object that culls triangles and vertices: as_dm_TRI_longform <- function(x) {
x <- unclass(x)
x$triangle <- x$triangle %>%
tidyr::pivot_longer(starts_with(".vx"),
names_to = "corner", values_to = "vertex_")
x %>% as_dm() %>%
dm_add_pk(object, object_) %>%
dm_add_fk(triangle, object_, object) %>%
dm_add_pk(vertex, vertex_) %>%
dm_add_fk(triangle, vertex_, vertex)
}
as_TRIlongform_dm <- function(x) {
x <- dm_apply_filters(x) %>% dm_get_tables()
x$triangle <- x$triangle %>%
tidyr::pivot_wider(names_from = corner,
values_from = vertex_) %>%
tidyr::unnest(cols = c(.vx0, .vx1, .vx2))
class(x) <- c("TRI", "sc")
x
}
library(silicate)
#>
#> Attaching package: 'silicate'
#> The following object is masked from 'package:stats':
#>
#> filter
library(dm)
#>
#> Attaching package: 'dm'
#>
#> The following object is masked from 'package:stats':
#>
#> filter
tri <- TRI(minimal_mesh)
## a dm version of TRI
x <- as_dm_TRI_longform(tri)
x
#> ── Table source ─────────────────────────────────────────────────────────
#> src: <environment: R_GlobalEnv>
#> ── Metadata ─────────────────────────────────────────────────────────────
#> Tables: `object`, `triangle`, `vertex`, `meta`
#> Columns: 11
#> Primary keys: 2
#> Foreign keys: 2
#validate_dm(x)
#dm_get_tables(x)
## round-trip
tri_f <- x %>% dm_filter(object, a == 1) %>% as_TRIlongform_dm()
#> Warning: Values in `vertex_` are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list(vertex_ = list)` to suppress this warning.
#> * Use `values_fn = list(vertex_ = length)` to identify where the duplicates arise
#> * Use `values_fn = list(vertex_ = summary_fun)` to summarise duplicates
par(mfrow = c(1, 2))
plot(tri, col = grey.colors(nrow(tri$triangle)))
plot(tri_f, col = grey.colors(nrow(tri_f$triangle))) Created on 2020-03-21 by the reprex package (v0.3.0) |
This is pretty good ## convert TRI to a longform triangle and then to dm
as_dm_TRI_longform <- function(x) {
x <- unclass(x)
x$triangle <- x$triangle %>%
tidyr::pivot_longer(starts_with(".vx"),
names_to = "corner", values_to = "vertex_")
x %>% as_dm() %>%
dm_add_pk(object, object_) %>%
dm_add_fk(triangle, object_, object) %>%
dm_add_pk(vertex, vertex_) %>%
dm_add_fk(triangle, vertex_, vertex)
}
## convert a dm with longform TRI to TRI
as_TRIlongform_dm <- function(x) {
x <- x %>% dm_apply_filters() %>% dm_get_tables() %>%
purrr::map(dplyr::collect)
x$triangle <- x$triangle %>%
tidyr::pivot_wider(names_from = corner,
values_from = vertex_) %>%
tidyr::unnest(cols = c(.vx0, .vx1, .vx2))
class(x) <- c("TRI", "sc")
x
}
library(silicate)
#>
#> Attaching package: 'silicate'
#> The following object is masked from 'package:stats':
#>
#> filter
library(dm)
#>
#> Attaching package: 'dm'
#>
#> The following object is masked from 'package:stats':
#>
#> filter
## sf polygons of Provinces, in triangulated form (just because)
tri <- TRI(inlandwaters)
## a dm version of TRI
x <- as_dm_TRI_longform(tri)
x
#> ── Table source ─────────────────────────────────────────────────────────
#> src: <environment: R_GlobalEnv>
#> ── Metadata ─────────────────────────────────────────────────────────────
#> Tables: `object`, `triangle`, `vertex`, `meta`
#> Columns: 12
#> Primary keys: 2
#> Foreign keys: 2
## unlink("afile.sql3")
src <- dplyr::src_sqlite("afile.sql3", create = TRUE)
sc <- copy_dm_to(src, x, temporary = FALSE)
rm(tri, x)
pryr::object_size(sc)
#> Registered S3 method overwritten by 'pryr':
#> method from
#> print.bytes Rcpp
#> 19.2 kB
file.info("afile.sql3")$size/1e6
#> [1] 7.589888
## apply filters to the object, and vertex tables and collect as TRI
tas_north <- sc %>%
dm_filter(object, Province == "Tasmania") %>%
dm_filter(vertex, y_ > -1500000) %>%
as_TRIlongform_dm()
#> Warning: Values in `vertex_` are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list(vertex_ = list)` to suppress this warning.
#> * Use `values_fn = list(vertex_ = length)` to identify where the duplicates arise
#> * Use `values_fn = list(vertex_ = summary_fun)` to summarise duplicates
## in the original sf "Province == "Tasmania" is a widely distributed
## set of islands, particularly the very tiny and to the far
## SE Macquarie Island, so we pick Tas and then zoom up to the north
## of the main islands of the province
par(mfrow = c(1, 2))
library(sf); plot(inlandwaters[5, 1]$geom, col = "grey")
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 7.0.0
plot(tas_north) Created on 2020-03-21 by the reprex package (v0.3.0) |
This looks very promising:
With osmdata we need to turn off checks because
object_
is not unique or exclusive.@mpadge basically what I was trying to do with https://github.com/hypertidy/rbot but obviously much more promising!
The text was updated successfully, but these errors were encountered: