Skip to content

Commit

Permalink
RST_Type RST_UpdateType (#564)
Browse files Browse the repository at this point in the history
* fixing deletion / typing issues for checkpointing

* fix diff

* does it build on GH?

* changed some of the python tests to allow the package to build with checkpointing by default

* updated R tests to allow checkpointing by default

* prep to pass around tmp prefix

* added RST_Type function to get type for each raster band

* added rst_updatetype

* added python bindings and docs

* added R tests

* fix to SQL reg for RST_Transform

* Feature/tin mesh (#569)

* added methods for triangulation and interpolation, data for testing

* updated interpolation function to use an STRTree

* fails due to row size exceeding heap

* working generator for unconstrained triangulation

* conforming triangulation appears to be working

* added st_interpolateelevation expression and updated wkb / wkt writers to respect coordinate dimension

* removed buffer for input masspoints

* first pass at rasterization, added toWKT and toWKB that can explicitly specify coordinate dimensions

* added a GDALRasterize operator to write outputs

* added RST_DTMFromGeoms initial implementation and tests

* fixed issues with interpolation and triangulation split points missing Z values

* updated tests to use 1km subset of masspoints

* fixed failing tests

* ran black on python module

* added python bindings for new functions

* added triangulation and interpolation python tests

* added python test for raster function

* added python test for raster function

* added SparkR tests for vector functions

* added SparkR tests for raster function

* added sparklyr tests for vector functions

* added raster function tests to sparklyr bindings / fixed typo in sql registration for rst_dtmfromgeoms

* description of st_triangulate

* mistake in BNG grid

* grid points are silently dropped if not in triangulated area (will be set equal to NoData value when raster written)

* updated logic of rasterize method to handle empty geometry collections

* hardcoded algorithm tolerance to zero

* check / fix failing test

* small docs updates

* minor docs fix, split out tolerance into two params

* updated docs / added examples for st_triangulate

* added docs for interpolateelevation and dtmfromgeoms

* ran isort, reverted h3 version bump, removed unused import
  • Loading branch information
sllynn authored Sep 24, 2024
1 parent bef8c1b commit ea37dda
Show file tree
Hide file tree
Showing 100 changed files with 3,073 additions and 1,312 deletions.
6 changes: 6 additions & 0 deletions R/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
**/.Rhistory
**/*.tar.gz
**/*.Rproj
/sparklyr-mosaic/metastore_db/
/sparklyr-mosaic/mosaic_checkpoint/
/sparklyr-mosaic/mosaic_tmp/
/sparkr-mosaic/metastore_db/
/sparkr-mosaic/mosaic_checkpoint/
/sparkr-mosaic/mosaic_tmp/
13 changes: 0 additions & 13 deletions R/sparkR-mosaic/SparkR.Rproj

This file was deleted.

2 changes: 1 addition & 1 deletion R/sparkR-mosaic/sparkrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: This package extends SparkR to bring the Databricks Mosaic for geos
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Collate:
'enableGDAL.R'
'enableMosaic.R'
Expand Down
Binary file not shown.
Binary file not shown.
30 changes: 30 additions & 0 deletions R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ generate_singleband_raster_df <- function() {

test_that("mosaic can read single-band GeoTiff", {
sdf <- generate_singleband_raster_df()

row <- first(sdf)
expect_equal(row$length, 1067862L)
expect_equal(row$x_size, 2400)
Expand All @@ -34,6 +35,8 @@ test_that("scalar raster functions behave as intended", {
sdf <- withColumn(sdf, "rst_scaley", rst_scaley(column("tile")))
sdf <- withColumn(sdf, "rst_srid", rst_srid(column("tile")))
sdf <- withColumn(sdf, "rst_summary", rst_summary(column("tile")))
sdf <- withColumn(sdf, "rst_type", rst_type(column("tile")))
sdf <- withColumn(sdf, "rst_updatetype", rst_updatetype(column("tile"), lit("Float32")))
sdf <- withColumn(sdf, "rst_upperleftx", rst_upperleftx(column("tile")))
sdf <- withColumn(sdf, "rst_upperlefty", rst_upperlefty(column("tile")))
sdf <- withColumn(sdf, "rst_width", rst_width(column("tile")))
Expand Down Expand Up @@ -137,4 +140,31 @@ test_that("the tessellate-join-clip-merge flow works on NetCDF files", {

expect_equal(nrow(merged_precipitation), 1)

})

test_that("a terrain model can be produced from point geometries", {

sdf <- createDataFrame(
data.frame(
wkt = c(
"POINT Z (3 2 1)",
"POINT Z (2 1 0)",
"POINT Z (1 3 3)",
"POINT Z (0 2 2)"
)
)
)

sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt")))
sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')"))
sdf <- withColumn(sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)")))
sdf <- withColumn(sdf, "xWidth", lit(12L))
sdf <- withColumn(sdf, "yWidth", lit(6L))
sdf <- withColumn(sdf, "xSize", lit(0.1))
sdf <- withColumn(sdf, "ySize", lit(0.1))
sdf <- withColumn(sdf, "tile", rst_dtmfromgeoms(
column("masspoints"), column("breaklines"), lit(0.0), lit(0.01),
column("origin"), column("xWidth"), column("yWidth"), column("xSize"), column("ySize"))
)
expect_equal(SparkR::count(sdf), 1)
})
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,46 @@ test_that("aggregate vector functions behave as intended", {
expect_true(first(sdf.intersection)$comparison_intersects)
expect_true(first(sdf.intersection)$comparison_intersection)

})
})

test_that("triangulation / interpolation functions behave as intended", {
sdf <- createDataFrame(
data.frame(
wkt = c(
"POINT Z (3 2 1)",
"POINT Z (2 1 0)",
"POINT Z (1 3 3)",
"POINT Z (0 2 2)"
)
)
)

sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt")))
sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')"))
triangulation_sdf <- withColumn(sdf, "triangles", st_triangulate(column("masspoints"), column("breaklines"), lit(0.0), lit(0.01)))
cache(triangulation_sdf)
expect_equal(SparkR::count(triangulation_sdf), 2)
expected <- c("POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))", "POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))")
expect_contains(expected, first(triangulation_sdf)$triangles)

interpolation_sdf <- sdf
interpolation_sdf <- withColumn(interpolation_sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)")))
interpolation_sdf <- withColumn(interpolation_sdf, "xWidth", lit(12L))
interpolation_sdf <- withColumn(interpolation_sdf, "yWidth", lit(6L))
interpolation_sdf <- withColumn(interpolation_sdf, "xSize", lit(0.1))
interpolation_sdf <- withColumn(interpolation_sdf, "ySize", lit(0.1))
interpolation_sdf <- withColumn(interpolation_sdf, "interpolated", st_interpolateelevation(
column("masspoints"),
column("breaklines"),
lit(0.0),
lit(0.01),
column("origin"),
column("xWidth"),
column("yWidth"),
column("xSize"),
column("ySize")
))
cache(interpolation_sdf)
expect_equal(SparkR::count(interpolation_sdf), 6 * 12)
expect_contains(collect(interpolation_sdf)$interpolated, "POINT Z(0.6 2 1.8)")
})
2 changes: 1 addition & 1 deletion R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: This package extends sparklyr to bring the Databricks Mosaic for ge
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Collate:
'enableGDAL.R'
'enableMosaic.R'
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ test_that("scalar raster functions behave as intended", {
mutate(rst_boundingbox = rst_boundingbox(tile)) %>%
mutate(rst_boundingbox = st_buffer(rst_boundingbox, -0.001)) %>%
mutate(rst_clip = rst_clip(tile, rst_boundingbox)) %>%
mutate(rst_combineavg = rst_combineavg(array(tile, rst_clip))) %>%
mutate(rst_frombands = rst_frombands(array(tile, tile))) %>%
mutate(rst_fromfile = rst_fromfile(path, -1L)) %>%
mutate(rst_combineavg = rst_combineavg(array(rst_fromfile, rst_clip))) %>%
mutate(rst_frombands = rst_frombands(array(tile, tile))) %>%
mutate(rst_georeference = rst_georeference(tile)) %>%
mutate(rst_getnodata = rst_getnodata(tile)) %>%
mutate(rst_subdatasets = rst_subdatasets(tile)) %>%
Expand Down Expand Up @@ -63,6 +63,8 @@ test_that("scalar raster functions behave as intended", {
mutate(rst_scaley = rst_scaley(tile)) %>%
mutate(rst_srid = rst_srid(tile)) %>%
mutate(rst_summary = rst_summary(tile)) %>%
mutate(rst_type = rst_type(tile)) %>%
mutate(rst_updatetype = rst_updatetype(tile, "Float32")) %>%
mutate(rst_upperleftx = rst_upperleftx(tile)) %>%
mutate(rst_upperlefty = rst_upperlefty(tile)) %>%
mutate(rst_width = rst_width(tile)) %>%
Expand Down Expand Up @@ -173,9 +175,46 @@ test_that("the tessellate-join-clip-merge flow works on NetCDF files", {
inner_join(census_sdf, by = "index_id") %>%
mutate(tile = rst_clip(tile, wkb))


merged_precipitation <- clipped_sdf %>%
group_by(region_keys, timestep) %>%
summarise(tile = rst_merge_agg(tile))

expect_equal(sdf_nrow(merged_precipitation), 1)
})

test_that ("a terrain model can be produced from point geometries", {

sdf <- sdf_copy_to(sc, data.frame(
wkt = c(
"POINT Z (3 2 1)",
"POINT Z (2 1 0)",
"POINT Z (1 3 3)",
"POINT Z (0 2 2)"
)
)
) %>%
group_by() %>%
summarise(masspoints = collect_list("wkt")) %>%
mutate(
breaklines = array("LINESTRING EMPTY"),
origin = st_geomfromwkt("POINT (0.6 1.8)"),
xWidth = 12L,
yWidth = 6L,
xSize = as.double(0.1),
ySize = as.double(0.1),
tile = rst_dtmfromgeoms(
masspoints,
breaklines,
as.double(0.0),
as.double(0.01),
origin,
xWidth,
yWidth,
xSize,
ySize
)
)
expect_equal(sdf_nrow(sdf), 1)

})
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
options(warn = -1)

test_that("scalar vector functions behave as intended", {

sdf_raw <- sdf_copy_to(
sc,
data.frame(
wkt = "POLYGON ((2 1, 1 2, 2 3, 2 1))",
point_wkt = "POINT (1 1)"
point_wkt = "POINT (1 1)")
)
)

sdf <- sdf_raw %>% mutate(
st_area = st_area(wkt),
Expand All @@ -24,7 +22,12 @@ test_that("scalar vector functions behave as intended", {
st_rotate = st_rotate(wkt, 1L),
st_centroid = st_centroid(wkt),
st_numpoints = st_numpoints(wkt),
st_haversine = st_haversine(as.double(0.0), as.double(90.0), as.double(0.0), as.double(0.0)),
st_haversine = st_haversine(
as.double(0.0),
as.double(90.0),
as.double(0.0),
as.double(0.0)
),
st_isvalid = st_isvalid(wkt),
st_hasvalidcoordinates = st_hasvalidcoordinates(wkt, "EPSG:2192", "bounds"),
st_intersects = st_intersects(wkt, wkt),
Expand Down Expand Up @@ -75,33 +78,28 @@ test_that("scalar vector functions behave as intended", {
})

test_that("aggregate vector functions behave as intended", {

inputGJ <- read_file("data/boroughs.geojson")
sdf <- sdf_sql(sc, "SELECT id as location_id FROM range(1)") %>%
mutate(geometry = st_geomfromgeojson(inputGJ))
expect_equal(sdf_nrow(sdf), 1)

sdf.l <- sdf %>%
select(
left_id = location_id,
left_geom = geometry
) %>%
select(left_id = location_id, left_geom = geometry) %>%
mutate(left_index = mosaic_explode(left_geom, 11L))

sdf.r <- sdf %>%
select(
right_id = location_id,
right_geom = geometry
) %>%
select(right_id = location_id, right_geom = geometry) %>%
mutate(right_geom = st_translate(
right_geom,
st_area(right_geom) * runif(n()) * 0.1,
st_area(right_geom) * runif(n()) * 0.1)
) %>%
st_area(right_geom) * runif(n()) * 0.1
)) %>%
mutate(right_index = mosaic_explode(right_geom, 11L))

sdf.intersection <- sdf.l %>%
inner_join(sdf.r, by = c("left_index" = "right_index"), keep = TRUE) %>%
inner_join(sdf.r,
by = c("left_index" = "right_index"),
keep = TRUE) %>%
group_by(left_id, right_id) %>%
summarise(
agg_intersects = st_intersects_agg(left_index, right_index),
Expand All @@ -124,3 +122,46 @@ test_that("aggregate vector functions behave as intended", {


})

test_that ("triangulation and interpolation functions behave as intended", {
sdf <- sdf_copy_to(sc, data.frame(
wkt = c("POINT Z (3 2 1)", "POINT Z (2 1 0)", "POINT Z (1 3 3)", "POINT Z (0 2 2)")
))

sdf <- sdf %>%
group_by() %>%
summarise(masspoints = collect_list(wkt)) %>%
mutate(breaklines = array("LINESTRING EMPTY"))

triangulation_sdf <- sdf %>%
mutate(triangles = st_triangulate(masspoints, breaklines, as.double(0.00), as.double(0.01)))

expect_equal(sdf_nrow(triangulation_sdf), 2)

expected <- c("POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))",
"POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))")
expect_contains(expected, sdf_collect(triangulation_sdf)$triangles[0])

interpolation_sdf <- sdf %>%
mutate(
origin = st_geomfromwkt("POINT (0.6 1.8)"),
xWidth = 12L,
yWidth = 6L,
xSize = as.double(0.1),
ySize = as.double(0.1),
interpolated = st_interpolateelevation(
masspoints,
breaklines,
as.double(0.0),
as.double(0.01),
origin,
xWidth,
yWidth,
xSize,
ySize
)
)
expect_equal(sdf_nrow(interpolation_sdf), 6 * 12)
expect_contains(sdf_collect(interpolation_sdf)$interpolated,
"POINT Z(0.6 2 1.8)")
})
Loading

0 comments on commit ea37dda

Please sign in to comment.