diff --git a/R/.gitignore b/R/.gitignore index f5b37c1ec..a5dfb7e50 100644 --- a/R/.gitignore +++ b/R/.gitignore @@ -1,3 +1,9 @@ **/.Rhistory **/*.tar.gz +**/*.Rproj /sparklyr-mosaic/metastore_db/ +/sparklyr-mosaic/mosaic_checkpoint/ +/sparklyr-mosaic/mosaic_tmp/ +/sparkr-mosaic/metastore_db/ +/sparkr-mosaic/mosaic_checkpoint/ +/sparkr-mosaic/mosaic_tmp/ \ No newline at end of file diff --git a/R/sparkR-mosaic/SparkR.Rproj b/R/sparkR-mosaic/SparkR.Rproj deleted file mode 100644 index 8e3c2ebc9..000000000 --- a/R/sparkR-mosaic/SparkR.Rproj +++ /dev/null @@ -1,13 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX diff --git a/R/sparkR-mosaic/sparkrMosaic/DESCRIPTION b/R/sparkR-mosaic/sparkrMosaic/DESCRIPTION index 2ea7718f9..c740cc011 100644 --- a/R/sparkR-mosaic/sparkrMosaic/DESCRIPTION +++ b/R/sparkR-mosaic/sparkrMosaic/DESCRIPTION @@ -8,7 +8,7 @@ Description: This package extends SparkR to bring the Databricks Mosaic for geos License: Databricks Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Collate: 'enableGDAL.R' 'enableMosaic.R' diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_breakline.zip b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_breakline.zip new file mode 100644 index 000000000..4f7f3d57a Binary files /dev/null and b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_breakline.zip differ diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_point.zip b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_point.zip new file mode 100644 index 000000000..825fff818 Binary files /dev/null and b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/data/sd46_dtm_point.zip differ diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R index 6e23454dc..6e3ea147e 100644 --- a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R +++ b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R @@ -8,6 +8,7 @@ generate_singleband_raster_df <- function() { test_that("mosaic can read single-band GeoTiff", { sdf <- generate_singleband_raster_df() + row <- first(sdf) expect_equal(row$length, 1067862L) expect_equal(row$x_size, 2400) @@ -34,6 +35,8 @@ test_that("scalar raster functions behave as intended", { sdf <- withColumn(sdf, "rst_scaley", rst_scaley(column("tile"))) sdf <- withColumn(sdf, "rst_srid", rst_srid(column("tile"))) sdf <- withColumn(sdf, "rst_summary", rst_summary(column("tile"))) + sdf <- withColumn(sdf, "rst_type", rst_type(column("tile"))) + sdf <- withColumn(sdf, "rst_updatetype", rst_updatetype(column("tile"), lit("Float32"))) sdf <- withColumn(sdf, "rst_upperleftx", rst_upperleftx(column("tile"))) sdf <- withColumn(sdf, "rst_upperlefty", rst_upperlefty(column("tile"))) sdf <- withColumn(sdf, "rst_width", rst_width(column("tile"))) @@ -137,4 +140,31 @@ test_that("the tessellate-join-clip-merge flow works on NetCDF files", { expect_equal(nrow(merged_precipitation), 1) +}) + +test_that("a terrain model can be produced from point geometries", { + +sdf <- createDataFrame( + data.frame( + wkt = c( + "POINT Z (3 2 1)", + "POINT Z (2 1 0)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ) + ) +) + +sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt"))) +sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) +sdf <- withColumn(sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) +sdf <- withColumn(sdf, "xWidth", lit(12L)) +sdf <- withColumn(sdf, "yWidth", lit(6L)) +sdf <- withColumn(sdf, "xSize", lit(0.1)) +sdf <- withColumn(sdf, "ySize", lit(0.1)) +sdf <- withColumn(sdf, "tile", rst_dtmfromgeoms( +column("masspoints"), column("breaklines"), lit(0.0), lit(0.01), +column("origin"), column("xWidth"), column("yWidth"), column("xSize"), column("ySize")) +) +expect_equal(SparkR::count(sdf), 1) }) \ No newline at end of file diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R index 154a4cb0f..4294e1a15 100644 --- a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R @@ -93,4 +93,46 @@ test_that("aggregate vector functions behave as intended", { expect_true(first(sdf.intersection)$comparison_intersects) expect_true(first(sdf.intersection)$comparison_intersection) -}) \ No newline at end of file +}) + +test_that("triangulation / interpolation functions behave as intended", { +sdf <- createDataFrame( + data.frame( + wkt = c( + "POINT Z (3 2 1)", + "POINT Z (2 1 0)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ) + ) +) + +sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt"))) +sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) +triangulation_sdf <- withColumn(sdf, "triangles", st_triangulate(column("masspoints"), column("breaklines"), lit(0.0), lit(0.01))) +cache(triangulation_sdf) +expect_equal(SparkR::count(triangulation_sdf), 2) +expected <- c("POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))", "POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))") +expect_contains(expected, first(triangulation_sdf)$triangles) + +interpolation_sdf <- sdf +interpolation_sdf <- withColumn(interpolation_sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) +interpolation_sdf <- withColumn(interpolation_sdf, "xWidth", lit(12L)) +interpolation_sdf <- withColumn(interpolation_sdf, "yWidth", lit(6L)) +interpolation_sdf <- withColumn(interpolation_sdf, "xSize", lit(0.1)) +interpolation_sdf <- withColumn(interpolation_sdf, "ySize", lit(0.1)) +interpolation_sdf <- withColumn(interpolation_sdf, "interpolated", st_interpolateelevation( + column("masspoints"), + column("breaklines"), + lit(0.0), + lit(0.01), + column("origin"), + column("xWidth"), + column("yWidth"), + column("xSize"), + column("ySize") +)) +cache(interpolation_sdf) +expect_equal(SparkR::count(interpolation_sdf), 6 * 12) +expect_contains(collect(interpolation_sdf)$interpolated, "POINT Z(0.6 2 1.8)") +}) diff --git a/R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION b/R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION index 4ce2f2be1..7fa449710 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION +++ b/R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION @@ -8,7 +8,7 @@ Description: This package extends sparklyr to bring the Databricks Mosaic for ge License: Databricks Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Collate: 'enableGDAL.R' 'enableMosaic.R' diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_breakline.zip b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_breakline.zip new file mode 100644 index 000000000..4f7f3d57a Binary files /dev/null and b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_breakline.zip differ diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_point.zip b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_point.zip new file mode 100644 index 000000000..825fff818 Binary files /dev/null and b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/data/sd46_dtm_point.zip differ diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R index 3cf016fa7..ea28a44d6 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R +++ b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R @@ -29,9 +29,9 @@ test_that("scalar raster functions behave as intended", { mutate(rst_boundingbox = rst_boundingbox(tile)) %>% mutate(rst_boundingbox = st_buffer(rst_boundingbox, -0.001)) %>% mutate(rst_clip = rst_clip(tile, rst_boundingbox)) %>% - mutate(rst_combineavg = rst_combineavg(array(tile, rst_clip))) %>% - mutate(rst_frombands = rst_frombands(array(tile, tile))) %>% mutate(rst_fromfile = rst_fromfile(path, -1L)) %>% + mutate(rst_combineavg = rst_combineavg(array(rst_fromfile, rst_clip))) %>% + mutate(rst_frombands = rst_frombands(array(tile, tile))) %>% mutate(rst_georeference = rst_georeference(tile)) %>% mutate(rst_getnodata = rst_getnodata(tile)) %>% mutate(rst_subdatasets = rst_subdatasets(tile)) %>% @@ -63,6 +63,8 @@ test_that("scalar raster functions behave as intended", { mutate(rst_scaley = rst_scaley(tile)) %>% mutate(rst_srid = rst_srid(tile)) %>% mutate(rst_summary = rst_summary(tile)) %>% + mutate(rst_type = rst_type(tile)) %>% + mutate(rst_updatetype = rst_updatetype(tile, "Float32")) %>% mutate(rst_upperleftx = rst_upperleftx(tile)) %>% mutate(rst_upperlefty = rst_upperlefty(tile)) %>% mutate(rst_width = rst_width(tile)) %>% @@ -173,9 +175,46 @@ test_that("the tessellate-join-clip-merge flow works on NetCDF files", { inner_join(census_sdf, by = "index_id") %>% mutate(tile = rst_clip(tile, wkb)) + merged_precipitation <- clipped_sdf %>% group_by(region_keys, timestep) %>% summarise(tile = rst_merge_agg(tile)) expect_equal(sdf_nrow(merged_precipitation), 1) +}) + +test_that ("a terrain model can be produced from point geometries", { + + sdf <- sdf_copy_to(sc, data.frame( + wkt = c( + "POINT Z (3 2 1)", + "POINT Z (2 1 0)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ) + ) + ) %>% + group_by() %>% + summarise(masspoints = collect_list("wkt")) %>% + mutate( + breaklines = array("LINESTRING EMPTY"), + origin = st_geomfromwkt("POINT (0.6 1.8)"), + xWidth = 12L, + yWidth = 6L, + xSize = as.double(0.1), + ySize = as.double(0.1), + tile = rst_dtmfromgeoms( + masspoints, + breaklines, + as.double(0.0), + as.double(0.01), + origin, + xWidth, + yWidth, + xSize, + ySize + ) + ) + expect_equal(sdf_nrow(sdf), 1) + }) \ No newline at end of file diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R index 7aa5addda..a177b37bf 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R @@ -1,14 +1,12 @@ options(warn = -1) test_that("scalar vector functions behave as intended", { - sdf_raw <- sdf_copy_to( sc, data.frame( wkt = "POLYGON ((2 1, 1 2, 2 3, 2 1))", - point_wkt = "POINT (1 1)" + point_wkt = "POINT (1 1)") ) - ) sdf <- sdf_raw %>% mutate( st_area = st_area(wkt), @@ -24,7 +22,12 @@ test_that("scalar vector functions behave as intended", { st_rotate = st_rotate(wkt, 1L), st_centroid = st_centroid(wkt), st_numpoints = st_numpoints(wkt), - st_haversine = st_haversine(as.double(0.0), as.double(90.0), as.double(0.0), as.double(0.0)), + st_haversine = st_haversine( + as.double(0.0), + as.double(90.0), + as.double(0.0), + as.double(0.0) + ), st_isvalid = st_isvalid(wkt), st_hasvalidcoordinates = st_hasvalidcoordinates(wkt, "EPSG:2192", "bounds"), st_intersects = st_intersects(wkt, wkt), @@ -75,33 +78,28 @@ test_that("scalar vector functions behave as intended", { }) test_that("aggregate vector functions behave as intended", { - inputGJ <- read_file("data/boroughs.geojson") sdf <- sdf_sql(sc, "SELECT id as location_id FROM range(1)") %>% mutate(geometry = st_geomfromgeojson(inputGJ)) expect_equal(sdf_nrow(sdf), 1) sdf.l <- sdf %>% - select( - left_id = location_id, - left_geom = geometry - ) %>% + select(left_id = location_id, left_geom = geometry) %>% mutate(left_index = mosaic_explode(left_geom, 11L)) sdf.r <- sdf %>% - select( - right_id = location_id, - right_geom = geometry - ) %>% + select(right_id = location_id, right_geom = geometry) %>% mutate(right_geom = st_translate( right_geom, st_area(right_geom) * runif(n()) * 0.1, - st_area(right_geom) * runif(n()) * 0.1) - ) %>% + st_area(right_geom) * runif(n()) * 0.1 + )) %>% mutate(right_index = mosaic_explode(right_geom, 11L)) sdf.intersection <- sdf.l %>% - inner_join(sdf.r, by = c("left_index" = "right_index"), keep = TRUE) %>% + inner_join(sdf.r, + by = c("left_index" = "right_index"), + keep = TRUE) %>% group_by(left_id, right_id) %>% summarise( agg_intersects = st_intersects_agg(left_index, right_index), @@ -124,3 +122,46 @@ test_that("aggregate vector functions behave as intended", { }) + +test_that ("triangulation and interpolation functions behave as intended", { + sdf <- sdf_copy_to(sc, data.frame( + wkt = c("POINT Z (3 2 1)", "POINT Z (2 1 0)", "POINT Z (1 3 3)", "POINT Z (0 2 2)") + )) + + sdf <- sdf %>% + group_by() %>% + summarise(masspoints = collect_list(wkt)) %>% + mutate(breaklines = array("LINESTRING EMPTY")) + + triangulation_sdf <- sdf %>% + mutate(triangles = st_triangulate(masspoints, breaklines, as.double(0.00), as.double(0.01))) + + expect_equal(sdf_nrow(triangulation_sdf), 2) + + expected <- c("POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))", + "POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))") + expect_contains(expected, sdf_collect(triangulation_sdf)$triangles[0]) + + interpolation_sdf <- sdf %>% + mutate( + origin = st_geomfromwkt("POINT (0.6 1.8)"), + xWidth = 12L, + yWidth = 6L, + xSize = as.double(0.1), + ySize = as.double(0.1), + interpolated = st_interpolateelevation( + masspoints, + breaklines, + as.double(0.0), + as.double(0.01), + origin, + xWidth, + yWidth, + xSize, + ySize + ) + ) + expect_equal(sdf_nrow(interpolation_sdf), 6 * 12) + expect_contains(sdf_collect(interpolation_sdf)$interpolated, + "POINT Z(0.6 2 1.8)") +}) diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 94daa5efa..0b950dff9 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -2,6 +2,7 @@ Raster functions ================= +##### Intro ##### Raster functions are available in mosaic if you have installed the optional dependency `GDAL`. @@ -482,6 +483,169 @@ rst_derivedband | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ + +rst_dtmfromgeoms +**************** + +.. function:: rst_dtmfromgeoms(pointsArray, linesArray, mergeTolerance, snapTolerance, origin, xWidth, yWidth, xSize, ySize) + + Generate a raster with interpolated elevations across a grid of points described by: + + - :code:`origin`: a point geometry describing the bottom-left corner of the grid, + - :code:`xWidth` and :code:`yWidth`: the number of points in the grid in x and y directions, + - :code:`xSize` and :code:`ySize`: the space between grid points in the x and y directions. + + :note: To generate a grid from a "top-left" :code:`origin`, use a negative value for :code:`ySize`. + + The underlying algorithm first creates a surface mesh by triangulating :code:`pointsArray` + (including :code:`linesArray` as a set of constraint lines) then determines where each point + in the grid would lie on the surface mesh. Finally, it interpolates the + elevation of that point based on the surrounding triangle's vertices. + + As with :code:`st_triangulate`, there are two 'tolerance' parameters for the algorithm: + + - :code:`mergeTolerance` sets the point merging tolerance of the triangulation algorithm, i.e. before the initial + triangulation is performed, nearby points in :code:`pointsArray` can be merged in order to speed up the triangulation + process. A value of zero means all points are considered for triangulation. + - :code:`snapTolerance` sets the tolerance for post-processing the results of the triangulation, i.e. matching + the vertices of the output triangles to input points / lines. This is necessary as the algorithm often returns null + height / Z values. Setting this to a large value may result in the incorrect Z values being assigned to the + output triangle vertices (especially when :code:`linesArray` contains very densely spaced segments). + Setting this value to zero may result in the output triangle vertices being assigned a null Z value. + Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + + This is a generator expression and the resulting DataFrame will contain one row per point of the grid. + + :param pointsArray: Array of geometries respresenting the points to be triangulated + :type pointsArray: Column (ArrayType(Geometry)) + :param linesArray: Array of geometries respresenting the lines to be used as constraints + :type linesArray: Column (ArrayType(Geometry)) + :param mergeTolerance: A tolerance used to coalesce points in close proximity to each other before performing triangulation. + :type mergeTolerance: Column (DoubleType) + :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + :type snapTolerance: Column (DoubleType) + :param origin: A point geometry describing the bottom-left corner of the grid. + :type origin: Column (Geometry) + :param xWidth: The number of points in the grid in x direction. + :type xWidth: Column (IntegerType) + :param yWidth: The number of points in the grid in y direction. + :type yWidth: Column (IntegerType) + :param xSize: The spacing between each point on the grid's x-axis. + :type xSize: Column (DoubleType) + :param ySize: The spacing between each point on the grid's y-axis. + :type ySize: Column (DoubleType) + :rtype: Column (RasterTileType) + + :example: + +.. tabs:: + .. code-tab:: py + + df = ( + spark.createDataFrame( + [ + ["POINT Z (2 1 0)"], + ["POINT Z (3 2 1)"], + ["POINT Z (1 3 3)"], + ["POINT Z (0 2 2)"], + ], + ["wkt"], + ) + .groupBy() + .agg(collect_list("wkt").alias("masspoints")) + .withColumn("breaklines", array(lit("LINESTRING EMPTY"))) + .withColumn("origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) + .withColumn("xWidth", lit(12)) + .withColumn("yWidth", lit(6)) + .withColumn("xSize", lit(0.1)) + .withColumn("ySize", lit(0.1)) + ) + df.select( + rst_dtmfromgeoms( + "masspoints", "breaklines", lit(0.0), lit(0.01), + "origin", "xWidth", "yWidth", "xSize", "ySize" + ) + ).show(truncate=False) + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |rst_dtmfromgeoms(masspoints, breaklines, 0.0, 0.01, origin, xWidth, yWidth, xSize, ySize) | + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |{NULL, /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, {path -> /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_error -> , all_parents -> , driver -> GTiff, parentPath -> /tmp/mosaic_tmp/mosaic5678582907307109410/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_command -> gdal_rasterize ATTRIBUTE=VALUES -of GTiff -co TILED=YES -co COMPRESS=DEFLATE}}| + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + val df = Seq( + Seq( + "POINT Z (2 1 0)", "POINT Z (3 2 1)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + .toDF("masspoints") + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) + .withColumn("xWidth", lit(12)) + .withColumn("yWidth", lit(6)) + .withColumn("xSize", lit(0.1)) + .withColumn("ySize", lit(0.1)) + + df.select( + rst_dtmfromgeoms( + $"masspoints", $"breaklines", lit(0.0), lit(0.01), + $"origin", $"xWidth", $"yWidth", $"xSize", $"ySize" + ) + ).show(1, false) + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |rst_dtmfromgeoms(masspoints, breaklines, 0.0, 0.01, origin, xWidth, yWidth, xSize, ySize) | + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |{NULL, /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, {path -> /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_error -> , all_parents -> , driver -> GTiff, parentPath -> /tmp/mosaic_tmp/mosaic5678582907307109410/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_command -> gdal_rasterize ATTRIBUTE=VALUES -of GTiff -co TILED=YES -co COMPRESS=DEFLATE}}| + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + + SELECT + RST_DTMFROMGEOMS( + ARRAY( + "POINT Z (2 1 0)", + "POINT Z (3 2 1)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ), + ARRAY("LINESTRING EMPTY"), + DOUBLE(0.0), DOUBLE(0.01), + "POINT (0.6 1.8)", 12, 6, DOUBLE(0.1), DOUBLE(0.1) + ) AS tile + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |rst_dtmfromgeoms(masspoints, breaklines, 0.0, 0.01, origin, xWidth, yWidth, xSize, ySize) | + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |{NULL, /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, {path -> /dbfs/tmp/mosaic/raster/checkpoint/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_error -> , all_parents -> , driver -> GTiff, parentPath -> /tmp/mosaic_tmp/mosaic5678582907307109410/raster_d4ab419f_9829_4004_99a3_aaa597a69938.GTiff, last_command -> gdal_rasterize ATTRIBUTE=VALUES -of GTiff -co TILED=YES -co COMPRESS=DEFLATE}}| + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: r R + + sdf <- createDataFrame( + data.frame( + points = c( + "POINT Z (3 2 1)", "POINT Z (2 1 0)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + ) + sdf <- agg(groupBy(sdf), masspoints = collect_list(column("points"))) + sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) + sdf <- select(sdf, rst_dtmfromgeoms( + column("masspoints"), column("breaklines"), + lit(0.0), lit(0.01), + lit("POINT (0.6 1.8)"), lit(12L), lit(6L), lit(0.1), lit(0.1) + ) + ) + showDF(sdf, n=1, truncate=F) + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |rst_dtmfromgeoms(masspoints, breaklines, 0.0, 0.01, POINT (0.6 1.8), 12, 6, 0.1, 0.1) | + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + |{NULL, /dbfs/tmp/mosaic/raster/checkpoint/raster_ab03a97f_9bc3_410c_80e1_adf6f75f46e2.GTiff, {path -> /dbfs/tmp/mosaic/raster/checkpoint/raster_ab03a97f_9bc3_410c_80e1_adf6f75f46e2.GTiff, last_error -> , all_parents -> , driver -> GTiff, parentPath -> /tmp/mosaic_tmp/mosaic8840676907961488874/raster_ab03a97f_9bc3_410c_80e1_adf6f75f46e2.GTiff, last_command -> gdal_rasterize ATTRIBUTE=VALUES -of GTiff -co TILED=YES -co COMPRESS=DEFLATE}}| + +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + rst_filter ********** @@ -3008,6 +3172,99 @@ rst_tryopen | true | +------------------------------------------------------------------------------------------------------------------+ + +rst_type +******** + +.. function:: rst_type(tile) + + Returns the data type of the raster's bands. + + :param tile: A column containing the raster tile. + :type tile: Column (RasterTileType) + :rtype: Column: StringType + + :example: + +.. tabs:: + .. code-tab:: py + + df.select(mos.rst_type('tile')).display() + +------------------------------------------------------------------------------------------------------------------+ + | rst_type(tile) | + +------------------------------------------------------------------------------------------------------------------+ + | [Int16] | + +------------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + df.select(rst_type(col("tile"))).show + +------------------------------------------------------------------------------------------------------------------+ + | rst_type(tile) | + +------------------------------------------------------------------------------------------------------------------+ + | [Int16] | + +------------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + + SELECT rst_type(tile) FROM table + +------------------------------------------------------------------------------------------------------------------+ + | rst_type(tile) | + +------------------------------------------------------------------------------------------------------------------+ + | [Int16] | + +------------------------------------------------------------------------------------------------------------------+ + + +rst_updatetype +************** + +.. function:: rst_updatetype(tile, newType) + + Translates the raster to a new data type. + + :param tile: A column containing the raster tile. + :type tile: Column (RasterTileType) + :param newType: Data type to translate the raster to. + :type newType: Column (StringType) + :rtype: Column: (RasterTileType) + + :example: + +.. tabs:: + .. code-tab:: py + + df.select(mos.rst_updatetype('tile', lit('Float32'))).display() + +----------------------------------------------------------------------------------------------------+ + | rst_updatetype(tile,Float32) | + +----------------------------------------------------------------------------------------------------+ + | {"index_id":null,"raster":"SUkqAAg...= (truncated)","metadata":{"path":"... .tif","last_error":"", | + | "all_parents":"no_path","driver":"GTiff","parentPath":"no_path", | + | "last_command":"gdaltranslate -ot Float32 -of GTiff -co TILED=YES -co COMPRESS=DEFLATE"}} | + +----------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + df.select(rst_updatetype(col("tile"), lit("Float32"))).show + +----------------------------------------------------------------------------------------------------+ + | rst_updatetype(tile,Float32) | + +----------------------------------------------------------------------------------------------------+ + | {"index_id":null,"raster":"SUkqAAg...= (truncated)","metadata":{"path":"... .tif","last_error":"", | + | "all_parents":"no_path","driver":"GTiff","parentPath":"no_path", | + | "last_command":"gdaltranslate -ot Float32 -of GTiff -co TILED=YES -co COMPRESS=DEFLATE"}} | + +----------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + + SELECT rst_updatetype(tile, 'Float32') FROM table + +----------------------------------------------------------------------------------------------------+ + | rst_updatetype(tile,Float32) | + +----------------------------------------------------------------------------------------------------+ + | {"index_id":null,"raster":"SUkqAAg...= (truncated)","metadata":{"path":"... .tif","last_error":"", | + | "all_parents":"no_path","driver":"GTiff","parentPath":"no_path", | + | "last_command":"gdaltranslate -ot Float32 -of GTiff -co TILED=YES -co COMPRESS=DEFLATE"}} | + +----------------------------------------------------------------------------------------------------+ + + rst_upperleftx ********************** diff --git a/docs/source/api/spatial-functions.rst b/docs/source/api/spatial-functions.rst index 9e2dfbc55..9919bc10b 100644 --- a/docs/source/api/spatial-functions.rst +++ b/docs/source/api/spatial-functions.rst @@ -878,6 +878,180 @@ st_haversine .. note:: Results of this function are always expressed in km, while the input lat/lng pairs are expected to be in degrees. The radius used (in km) is 6371.0088. +st_interpolateelevation +*********************** + +.. function:: st_interpolateelevation(pointsArray, linesArray, mergeTolerance, snapTolerance, origin, xWidth, yWidth, xSize, ySize) + + Compute interpolated elevations across a grid of points described by: + + - :code:`origin`: a point geometry describing the bottom-left corner of the grid, + - :code:`xWidth` and :code:`yWidth`: the number of points in the grid in x and y directions, + - :code:`xSize` and :code:`ySize`: the space between grid points in the x and y directions. + + :note: To generate a grid from a "top-left" :code:`origin`, use a negative value for :code:`ySize`. + + The underlying algorithm first creates a surface mesh by triangulating :code:`pointsArray` + (including :code:`linesArray` as a set of constraint lines) then determines where each point + in the grid would lie on the surface mesh. Finally, it interpolates the + elevation of that point based on the surrounding triangle's vertices. + + As with :code:`st_triangulate`, there are two 'tolerance' parameters for the algorithm: + + - :code:`mergeTolerance` sets the point merging tolerance of the triangulation algorithm, i.e. before the initial + triangulation is performed, nearby points in :code:`pointsArray` can be merged in order to speed up the triangulation + process. A value of zero means all points are considered for triangulation. + - :code:`snapTolerance` sets the tolerance for post-processing the results of the triangulation, i.e. matching + the vertices of the output triangles to input points / lines. This is necessary as the algorithm often returns null + height / Z values. Setting this to a large value may result in the incorrect Z values being assigned to the + output triangle vertices (especially when :code:`linesArray` contains very densely spaced segments). + Setting this value to zero may result in the output triangle vertices being assigned a null Z value. + Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + + This is a generator expression and the resulting DataFrame will contain one row per point of the grid. + + :param pointsArray: Array of geometries respresenting the points to be triangulated + :type pointsArray: Column (ArrayType(Geometry)) + :param linesArray: Array of geometries respresenting the lines to be used as constraints + :type linesArray: Column (ArrayType(Geometry)) + :param mergeTolerance: A tolerance used to coalesce points in close proximity to each other before performing triangulation. + :type mergeTolerance: Column (DoubleType) + :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + :type snapTolerance: Column (DoubleType) + :param origin: A point geometry describing the bottom-left corner of the grid. + :type origin: Column (Geometry) + :param xWidth: The number of points in the grid in x direction. + :type xWidth: Column (IntegerType) + :param yWidth: The number of points in the grid in y direction. + :type yWidth: Column (IntegerType) + :param xSize: The spacing between each point on the grid's x-axis. + :type xSize: Column (DoubleType) + :param ySize: The spacing between each point on the grid's y-axis. + :type ySize: Column (DoubleType) + :rtype: Column (Geometry) + + :example: + +.. tabs:: + .. code-tab:: py + + df = ( + spark.createDataFrame( + [ + ["POINT Z (2 1 0)"], + ["POINT Z (3 2 1)"], + ["POINT Z (1 3 3)"], + ["POINT Z (0 2 2)"], + ], + ["wkt"], + ) + .groupBy() + .agg(collect_list("wkt").alias("masspoints")) + .withColumn("breaklines", array(lit("LINESTRING EMPTY"))) + .withColumn("origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) + .withColumn("xWidth", lit(12)) + .withColumn("yWidth", lit(6)) + .withColumn("xSize", lit(0.1)) + .withColumn("ySize", lit(0.1)) + ) + df.select( + st_interpolateelevation( + "masspoints", "breaklines", lit(0.0), lit(0.01), + "origin", "xWidth", "yWidth", "xSize", "ySize" + ) + ).show(4, truncate=False) + +--------------------------------------------------+ + |geom | + +--------------------------------------------------+ + |POINT Z(1.4 2.1 1.6666666666666665) | + |POINT Z(1.5 2 1.5) | + |POINT Z(1.4 1.9000000000000001 1.4000000000000001)| + |POINT Z(0.9 2 1.7) | + +--------------------------------------------------+ + + .. code-tab:: scala + + val df = Seq( + Seq( + "POINT Z (2 1 0)", "POINT Z (3 2 1)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + .toDF("masspoints") + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) + .withColumn("xWidth", lit(12)) + .withColumn("yWidth", lit(6)) + .withColumn("xSize", lit(0.1)) + .withColumn("ySize", lit(0.1)) + + df.select( + st_interpolateelevation( + $"masspoints", $"breaklines", lit(0.0), lit(0.01), + $"origin", $"xWidth", $"yWidth", $"xSize", $"ySize" + ) + ).show(4, false) + +--------------------------------------------------+ + |geom | + +--------------------------------------------------+ + |POINT Z(1.4 2.1 1.6666666666666665) | + |POINT Z(1.5 2 1.5) | + |POINT Z(1.4 1.9000000000000001 1.4000000000000001)| + |POINT Z(0.9 2 1.7) | + +--------------------------------------------------+ + + .. code-tab:: sql + + SELECT + ST_INTERPOLATEELEVATION( + ARRAY( + "POINT Z (2 1 0)", + "POINT Z (3 2 1)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ), + ARRAY("LINESTRING EMPTY"), + DOUBLE(0.0), DOUBLE(0.01), + "POINT (0.6 1.8)", 12, 6, DOUBLE(0.1), DOUBLE(0.1) + ) + +--------------------------------------------------+ + |geom | + +--------------------------------------------------+ + |POINT Z(1.4 2.1 1.6666666666666665) | + |POINT Z(1.5 2 1.5) | + |POINT Z(1.4 1.9000000000000001 1.4000000000000001)| + |POINT Z(0.9 2 1.7) | + +--------------------------------------------------+ + + .. code-tab:: r R + + sdf <- createDataFrame( + data.frame( + points = c( + "POINT Z (3 2 1)", "POINT Z (2 1 0)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + ) + sdf <- agg(groupBy(sdf), masspoints = collect_list(column("points"))) + sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) + sdf <- select(sdf, st_interpolateelevation( + column("masspoints"), column("breaklines"), + lit(0.0), lit(0.01), + lit("POINT (0.6 1.8)"), lit(12L), lit(6L), lit(0.1), lit(0.1) + ) + ) + showDF(sdf, n=4, truncate=F) + +--------------------------------------------------+ + |geom | + +--------------------------------------------------+ + |POINT Z(1.4 2.1 1.6666666666666665) | + |POINT Z(1.5 2 1.5) | + |POINT Z(1.4 1.9000000000000001 1.4000000000000001)| + |POINT Z(0.9 2 1.7) | + +--------------------------------------------------+ + + st_intersection *************** @@ -1580,6 +1754,134 @@ st_transform by specifying the :code:`srcSRID` and :code:`dstSRID`. +st_triangulate +************** + +.. function:: st_triangulate(pointsArray, linesArray, mergeTolerance, snapTolerance) + + Performs a conforming Delaunay triangulation using the points in :code:`pointsArray` including :code:`linesArray` as constraint / break lines. + + There are two 'tolerance' parameters for the algorithm. + + - :code:`mergeTolerance` sets the point merging tolerance of the triangulation algorithm, i.e. before the initial + triangulation is performed, nearby points in :code:`pointsArray` can be merged in order to speed up the triangulation + process. A value of zero means all points are considered for triangulation. + - :code:`snapTolerance` sets the tolerance for post-processing the results of the triangulation, i.e. matching + the vertices of the output triangles to input points / lines. This is necessary as the algorithm often returns null + height / Z values. Setting this to a large value may result in the incorrect Z values being assigned to the + output triangle vertices (especially when :code:`linesArray` contains very densely spaced segments). + Setting this value to zero may result in the output triangle vertices being assigned a null Z value. + Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + + This is a generator expression and the resulting DataFrame will contain one row per triangle returned by the algorithm. + + :param pointsArray: Array of geometries respresenting the points to be triangulated + :type pointsArray: Column (ArrayType(Geometry)) + :param linesArray: Array of geometries respresenting the lines to be used as constraints + :type linesArray: Column (ArrayType(Geometry)) + :param mergeTolerance: A tolerance used to coalesce points in close proximity to each other before performing triangulation. + :type mergeTolerance: Column (DoubleType) + :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + :type snapTolerance: Column (DoubleType) + :rtype: Column (Geometry) + + :example: + +.. tabs:: + .. code-tab:: py + + df = ( + spark.createDataFrame( + [ + ["POINT Z (2 1 0)"], + ["POINT Z (3 2 1)"], + ["POINT Z (1 3 3)"], + ["POINT Z (0 2 2)"], + ], + ["wkt"], + ) + .groupBy() + .agg(collect_list("wkt").alias("masspoints")) + .withColumn("breaklines", array(lit("LINESTRING EMPTY"))) + .withColumn("triangles", st_triangulate("masspoints", "breaklines", lit(0.0), lit(0.01)) + ) + df.show(2, False) + +---------------------------------------+ + |triangles | + +---------------------------------------+ + |POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))| + |POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))| + +---------------------------------------+ + + .. code-tab:: scala + + val df = Seq( + Seq( + "POINT Z (2 1 0)", "POINT Z (3 2 1)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + .toDF("masspoints") + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("triangles", + st_triangulate( + $"masspoints", $"breaklines", + lit(0.0), lit(0.01) + ) + ) + + df.select(st_astext($"triangles")).show(2, false) + +------------------------------+ + |st_astext(triangles) | + +------------------------------+ + |POLYGON ((0 2, 2 1, 1 3, 0 2))| + |POLYGON ((1 3, 2 1, 3 2, 1 3))| + +------------------------------+ + + .. code-tab:: sql + + SELECT + ST_TRIANGULATE( + ARRAY( + "POINT Z (2 1 0)", + "POINT Z (3 2 1)", + "POINT Z (1 3 3)", + "POINT Z (0 2 2)" + ), + ARRAY("LINESTRING EMPTY"), + DOUBLE(0.0), DOUBLE(0.01) + ) + +---------------------------------------+ + |triangles | + +---------------------------------------+ + |POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))| + |POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))| + +---------------------------------------+ + + .. code-tab:: r R + + sdf <- createDataFrame( + data.frame( + points = c( + "POINT Z (3 2 1)", "POINT Z (2 1 0)", + "POINT Z (1 3 3)", "POINT Z (0 2 2)" + ) + ) + ) + sdf <- agg(groupBy(sdf), masspoints = collect_list(column("points"))) + sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) + result <- select(sdf, st_triangulate( + column("masspoints"), column("breaklines"), + lit(0.0), lit(0.01)) + ) + showDF(result, truncate=F) + +---------------------------------------+ + |triangles | + +---------------------------------------+ + |POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))| + |POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))| + +---------------------------------------+ + st_translate ************ diff --git a/docs/source/api/spatial-indexing.rst b/docs/source/api/spatial-indexing.rst index 4521dd38c..eb95a77b4 100644 --- a/docs/source/api/spatial-indexing.rst +++ b/docs/source/api/spatial-indexing.rst @@ -850,7 +850,7 @@ grid_cellkringexplode grid_cell_intersection -************** +********************** .. function:: grid_cell_intersection(left_chip, right_chip) @@ -906,7 +906,7 @@ grid_cell_intersection +--------------------------------------------------------+ grid_cell_union -************** +*************** .. function:: grid_cell_union(left_chip, right_chip) diff --git a/docs/source/api/vector-format-readers.rst b/docs/source/api/vector-format-readers.rst index f6821427f..8df63fb1a 100644 --- a/docs/source/api/vector-format-readers.rst +++ b/docs/source/api/vector-format-readers.rst @@ -2,9 +2,9 @@ Vector Format Readers ===================== - +##### Intro -################ +##### Mosaic provides spark readers for vector files supported by GDAL OGR drivers. Only the drivers that are built by default are supported. Here are some common useful file formats: @@ -35,7 +35,7 @@ Additionally, for convenience, Mosaic provides specific readers for Shapefile an * :code:`spark.read.format("shapefile")` reader for Shapefiles natively in Spark. spark.read.format("ogr") -************************* +************************ A base Spark SQL data source for reading GDAL vector data sources. The output of the reader is a DataFrame with inferred schema. The schema is inferred from both features and fields in the vector file. @@ -55,7 +55,8 @@ The reader supports the following options: * layerNumber - number of the layer to read (IntegerType), zero-indexed -.. function:: spark.read.format("ogr").load(path) +.. function:: load(path) + :module: spark.read.format("ogr") Loads a vector file and returns the result as a :class:`DataFrame`. @@ -128,7 +129,8 @@ and parsed into expected types on execution. The reader supports the following o * layerNumber - number of the layer to read (IntegerType), zero-indexed [pass as String] -.. function:: mos.read().format("multi_read_ogr").load(path) +.. function:: load(path) + :module: mos.read().format("multi_read_ogr") Loads a vector file and returns the result as a :class:`DataFrame`. @@ -175,7 +177,7 @@ and parsed into expected types on execution. The reader supports the following o spark.read.format("geo_db") -***************************** +*************************** Mosaic provides a reader for GeoDB files natively in Spark. The output of the reader is a DataFrame with inferred schema. Only 1 file per task is read. For parallel reading of large files use the multi_read_ogr reader. @@ -186,7 +188,8 @@ The reader supports the following options: * layerNumber - number of the layer to read (IntegerType), zero-indexed * vsizip - if the vector files are zipped files, set this to true (BooleanType) -.. function:: spark.read.format("geo_db").load(path) +.. function:: load(path) + :module: spark.read.format("geo_db") Loads a GeoDB file and returns the result as a :class:`DataFrame`. @@ -234,7 +237,7 @@ The reader supports the following options: spark.read.format("shapefile") -******************************** +****************************** Mosaic provides a reader for Shapefiles natively in Spark. The output of the reader is a DataFrame with inferred schema. Only 1 file per task is read. For parallel reading of large files use the multi_read_ogr reader. @@ -245,7 +248,8 @@ The reader supports the following options: * layerNumber - number of the layer to read (IntegerType), zero-indexed * vsizip - if the vector files are zipped files, set this to true (BooleanType) -.. function:: spark.read.format("shapefile").load(path) +.. function:: load(path) + :module: spark.read.format("shapefile") Loads a Shapefile and returns the result as a :class:`DataFrame`. @@ -291,6 +295,7 @@ The reader supports the following options: These must be supplied as a :code:`String`. Also, you can supply function signature values as :code:`String`. +################ Vector File UDFs ################ diff --git a/docs/source/conf.py b/docs/source/conf.py index e81dd3385..e01d5e4d0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ napoleon_use_admonition_for_notes = True sphinx_tabs_disable_tab_closing = True todo_include_todos = True +suppress_warnings = ["autosectionlabel.*"] # -- Options for HTML output ------------------------------------------------- @@ -64,27 +65,27 @@ html_theme_options = { # Set the name of the project to appear in the navigation. - 'nav_title': f'Mosaic {release}', + # 'nav_title': f'Mosaic {release}', # Specify a base_url used to generate sitemap.xml. If not # specified, then no sitemap will be built. # 'base_url': 'https://project.github.io/project', # Set the color and the accent color - 'color_primary': 'green', - 'color_accent': 'green', + # 'color_primary': 'green', + # 'color_accent': 'green', # Set the repo location to get a badge with stats - 'repo_url': 'https://github.com/databrickslabs/mosaic/', - 'repo_name': 'Mosaic', + # 'repo_url': 'https://github.com/databrickslabs/mosaic/', + # 'repo_name': 'Mosaic', - 'globaltoc_depth': 3, + # 'globaltoc_depth': 3, 'globaltoc_collapse': False, 'globaltoc_includehidden': True, - 'heroes': {'index': 'Simple, scalable geospatial analytics on Databricks', - 'examples/index': 'examples and tutorials to get started with ' - 'Mosaic'}, - "version_dropdown": True, + # 'heroes': {'index': 'Simple, scalable geospatial analytics on Databricks', + # 'examples/index': 'examples and tutorials to get started with ' + # 'Mosaic'}, + # "version_dropdown": True, # "version_json": "../versions-v2.json", } diff --git a/docs/source/usage/automatic-sql-registration.rst b/docs/source/usage/automatic-sql-registration.rst index 56cd1b219..48c19dff7 100644 --- a/docs/source/usage/automatic-sql-registration.rst +++ b/docs/source/usage/automatic-sql-registration.rst @@ -12,7 +12,7 @@ with a geospatial middleware component such as [Geoserver](https://geoserver.org .. warning:: Mosaic 0.4.x SQL bindings for DBR 13 can register with Assigned clusters (as Spark Expressions), but not Shared Access due - to `Unity Catalog `_ API changes, more `here `_. + to `Unity Catalog `__ API changes, more `here `__. Pre-requisites ************** @@ -20,13 +20,13 @@ Pre-requisites In order to use Mosaic, you must have access to a Databricks cluster running Databricks Runtime 13. If you have cluster creation permissions in your Databricks workspace, you can create a cluster using the instructions -`here `_. +`here `__. You will also need "Can Manage" permissions on this cluster in order to attach init script to your cluster. A workspace administrator will be able to grant these permissions and more information about cluster permissions can be found in our documentation -`here `_. +`here `__. Installation ************ @@ -59,9 +59,9 @@ To install Mosaic on your Databricks cluster, take the following steps: EOF -#. Configure the init script for the cluster following the instructions `here `_. +#. Configure the init script for the cluster following the instructions `here `__. -#. Add the following spark configuration values for your cluster following the instructions `here `_. +#. Add the following spark configuration values for your cluster following the instructions `here `__. .. code-block:: bash diff --git a/docs/source/usage/install-gdal.rst b/docs/source/usage/install-gdal.rst index 12d1217d0..bd71d20dc 100644 --- a/docs/source/usage/install-gdal.rst +++ b/docs/source/usage/install-gdal.rst @@ -8,17 +8,17 @@ In order to use Mosaic 0.4 series, you must have access to a Databricks cluster Databricks Runtime 13.3 LTS. If you have cluster creation permissions in your Databricks workspace, you can create a cluster using the instructions -`here `_. +`here `__. You will also need "Can Manage" permissions on this cluster in order to attach the Mosaic library to your cluster. A workspace administrator will be able to grant these permissions and more information about cluster permissions can be found in our documentation -`here `_. +`here `__. .. warning:: These instructions assume an Assigned cluster is being used (vs a Shared Access cluster), - more on access modes `here `_. + more on access modes `here `__. GDAL Installation #################### @@ -131,7 +131,7 @@ GDAL is configured as follows in `MosaicGDAL " * - GDAL_PAM_PROXY_DIR diff --git a/docs/source/usage/installation.rst b/docs/source/usage/installation.rst index cdeeba4d0..777a471a3 100644 --- a/docs/source/usage/installation.rst +++ b/docs/source/usage/installation.rst @@ -16,49 +16,49 @@ Mosaic 0.4.x series only supports DBR 13.x DBRs. If running on a different DBR i DEPRECATION ERROR: Mosaic v0.4.x series only supports Databricks Runtime 13. You can specify :code:`%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13. -Mosaic 0.4.x series issues an ERROR on standard, non-Photon clusters `ADB `_ | -`AWS `_ | -`GCP `_: +Mosaic 0.4.x series issues an ERROR on standard, non-Photon clusters `ADB `__ | +`AWS `__ | +`GCP `__: DEPRECATION ERROR: Please use a Databricks Photon-enabled Runtime for performance benefits or Runtime ML for spatial AI benefits; Mosaic 0.4.x series restricts executing this cluster. As of Mosaic 0.4.0 / DBR 13.3 LTS (subject to change in follow-on releases): -* `Assigned Clusters `_ +* `Assigned Clusters `__ * Mosaic Python, SQL, R, and Scala APIs. -* `Shared Access Clusters `_ - * Mosaic Scala API (JVM) with Admin `allowlisting `_. +* `Shared Access Clusters `__ + * Mosaic Scala API (JVM) with Admin `allowlisting `__. * Mosaic Python bindings (to Mosaic Scala APIs) are blocked by Py4J Security on Shared Access Clusters. - * Mosaic SQL expressions cannot yet be registered due to `Unity Catalog `_. - API changes, more `here `_. + * Mosaic SQL expressions cannot yet be registered due to `Unity Catalog `__. + API changes, more `here `__. .. note:: Mosaic is a custom JVM library that extends spark, which has the following implications in DBR 13.3 LTS: - * `Unity Catalog `_ enforces process isolation which is difficult + * `Unity Catalog `__ enforces process isolation which is difficult to accomplish with custom JVM libraries; as such only built-in (aka platform provided) JVM APIs can be invoked from other supported languages in Shared Access Clusters. - * Clusters can read `Volumes `_ via relevant + * Clusters can read `Volumes `__ via relevant built-in (aka platform provided) readers and writers or via custom python calls which do not involve any custom JVM code. If you have cluster creation permissions in your Databricks workspace, you can create a cluster using the instructions -`here `_. +`here `__. You will also need "Can Manage" permissions on this cluster in order to attach the Mosaic library to your cluster. A workspace administrator will be able to grant these permissions and more information about cluster permissions can be found in our documentation -`here `_. +`here `__. Package installation #################### Installation from PyPI ********************** -Python users can install the library directly from `PyPI `_ -using the instructions `here `_ +Python users can install the library directly from `PyPI `__ +using the instructions `here `__ or from within a Databricks notebook using the :code:`%pip` magic command, e.g. .. code-block:: bash @@ -72,11 +72,11 @@ if you need to install Mosaic 0.3 series for DBR 12.2 LTS, e.g. %pip install "databricks-mosaic<0.4,>=0.3" -For Mosaic versions < 0.4 please use the `0.3 docs `_. +For Mosaic versions < 0.4 please use the `0.3 docs `__. Installation from release artifacts *********************************** -Alternatively, you can access the latest release artifacts `here `_ +Alternatively, you can access the latest release artifacts `here `__ and manually attach the appropriate library to your cluster. Which artifact you choose to attach will depend on the language API you intend to use. @@ -85,13 +85,13 @@ Which artifact you choose to attach will depend on the language API you intend t * For Scala users, take the Scala JAR (packaged with all necessary dependencies). * For R users, download the Scala JAR and the R bindings library [see the sparkR readme](R/sparkR-mosaic/README.md). -Instructions for how to attach libraries to a Databricks cluster can be found `here `_. +Instructions for how to attach libraries to a Databricks cluster can be found `here `__. Automated SQL registration ************************** If you would like to use Mosaic's functions in pure SQL (in a SQL notebook, from a business intelligence tool, or via a middleware layer such as Geoserver, perhaps) then you can configure -"Automatic SQL Registration" using the instructions `here `_. +"Automatic SQL Registration" using the instructions `here `__. Enabling the Mosaic functions ############################# @@ -184,4 +184,4 @@ register the Mosaic SQL functions in your SparkSession from a Scala notebook cel .. warning:: Mosaic 0.4.x SQL bindings for DBR 13 can register with Assigned clusters (as Spark Expressions), but not Shared Access due - to `Unity Catalog `_ API changes, more `here `_. + to `Unity Catalog `__ API changes, more `here `__. diff --git a/pom.xml b/pom.xml index b50ef2be4..ba0c1b94f 100644 --- a/pom.xml +++ b/pom.xml @@ -264,7 +264,7 @@ org.scoverage scoverage-maven-plugin - 2.0.2 + 1.4.11 scoverage-report diff --git a/python/mosaic/__init__.py b/python/mosaic/__init__.py index a8cd7e73a..da1c05eb5 100644 --- a/python/mosaic/__init__.py +++ b/python/mosaic/__init__.py @@ -1,7 +1,7 @@ from .api import * -from .core import MosaicLibraryHandler, MosaicContext -from .utils.display_handler import displayMosaic +from .core import MosaicContext, MosaicLibraryHandler from .models import SpatialKNN from .readers import read +from .utils.display_handler import displayMosaic __version__ = "0.4.3" diff --git a/python/mosaic/api/__init__.py b/python/mosaic/api/__init__.py index 9c8ce7a81..101989118 100644 --- a/python/mosaic/api/__init__.py +++ b/python/mosaic/api/__init__.py @@ -1,9 +1,9 @@ from .accessors import * from .aggregators import * from .constructors import * -from .enable import enable_mosaic, get_install_version, get_install_lib_dir +from .enable import enable_mosaic, get_install_lib_dir, get_install_version from .functions import * from .fuse import * -from .predicates import * from .gdal import * +from .predicates import * from .raster import * diff --git a/python/mosaic/api/aggregators.py b/python/mosaic/api/aggregators.py index 87eaed84b..ba638f139 100644 --- a/python/mosaic/api/aggregators.py +++ b/python/mosaic/api/aggregators.py @@ -66,11 +66,13 @@ def st_asgeojsontile_agg(geom: ColumnOrName, attributes: ColumnOrName) -> Column return config.mosaic_context.invoke_function( "st_asgeojsontile_agg", pyspark_to_java_column(geom), - pyspark_to_java_column(attributes) + pyspark_to_java_column(attributes), ) -def st_asmvttile_agg(geom: ColumnOrName, attributes: ColumnOrName, zxyID: ColumnOrName) -> Column: +def st_asmvttile_agg( + geom: ColumnOrName, attributes: ColumnOrName, zxyID: ColumnOrName +) -> Column: """ Returns the aggregated MVT tile. @@ -92,7 +94,7 @@ def st_asmvttile_agg(geom: ColumnOrName, attributes: ColumnOrName, zxyID: Column "st_asmvttile_agg", pyspark_to_java_column(geom), pyspark_to_java_column(attributes), - pyspark_to_java_column(zxyID) + pyspark_to_java_column(zxyID), ) diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index e7e66f974..6a24c2b58 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -74,14 +74,14 @@ def enable_mosaic( if not jar_autoattach: spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") print("...set 'spark.databricks.labs.mosaic.jar.autoattach' to false") - config.jar_autoattach=False + config.jar_autoattach = False if jar_path is not None: spark.conf.set("spark.databricks.labs.mosaic.jar.path", jar_path) print(f"...set 'spark.databricks.labs.mosaic.jar.path' to '{jar_path}'") - config.jar_path=jar_path + config.jar_path = jar_path if log_info: spark.sparkContext.setLogLevel("info") - config.log_info=True + config.log_info = True # Config global objects # - add MosaicContext after MosaicLibraryHandler diff --git a/python/mosaic/api/functions.py b/python/mosaic/api/functions.py index 7db323e56..e02458f41 100644 --- a/python/mosaic/api/functions.py +++ b/python/mosaic/api/functions.py @@ -12,70 +12,72 @@ ##################### __all__ = [ + "flatten_polygons", + "grid_boundary", + "grid_boundaryaswkb", + "grid_cell_intersection", + "grid_cell_union", + "grid_cellarea", + "grid_cellkloop", + "grid_cellkloopexplode", + "grid_cellkring", + "grid_cellkringexplode", + "grid_geometrykloop", + "grid_geometrykloopexplode", + "grid_geometrykring", + "grid_geometrykringexplode", + "grid_longlatascellid", + "grid_pointascellid", + "grid_polyfill", + "grid_tessellate", + "grid_tessellateexplode", + "index_geometry", + "mosaic_explode", + "mosaicfill", + "point_index_geom", + "point_index_lonlat", + "polyfill", "st_area", - "st_length", - "st_perimeter", - "st_convexhull", - "st_concavehull", "st_buffer", "st_bufferloop", + "st_centroid", + "st_centroid2D", + "st_concavehull", + "st_convexhull", + "st_difference", "st_dimension", + "st_distance", "st_dump", "st_envelope", - "st_srid", - "st_setsrid", - "st_transform", + "st_geometrytype", "st_hasvalidcoordinates", - "st_translate", - "st_scale", - "st_rotate", - "st_centroid", - "st_centroid2D", - "st_numpoints", - "st_isvalid", - "st_distance", "st_haversine", + "st_interpolateelevation", "st_intersection", - "st_difference", + "st_isvalid", + "st_length", + "st_numpoints", + "st_perimeter", + "st_rotate", + "st_scale", + "st_setsrid", "st_simplify", - "st_union", + "st_srid", + "st_transform", + "st_translate", + "st_triangulate", "st_unaryunion", + "st_union", "st_updatesrid", - "st_geometrytype", - "st_xmin", - "st_xmax", - "st_ymin", - "st_ymax", - "st_zmin", - "st_zmax", "st_x", + "st_xmax", + "st_xmin", "st_y", + "st_ymax", + "st_ymin", "st_z", - "flatten_polygons", - "grid_boundaryaswkb", - "grid_boundary", - "grid_longlatascellid", - "grid_pointascellid", - "grid_polyfill", - "grid_tessellate", - "grid_tessellateexplode", - "grid_cellarea", - "grid_cell_intersection", - "grid_cell_union", - "grid_cellkring", - "grid_cellkloop", - "grid_cellkringexplode", - "grid_cellkloopexplode", - "grid_geometrykring", - "grid_geometrykloop", - "grid_geometrykringexplode", - "grid_geometrykloopexplode", - "point_index_geom", - "point_index_lonlat", - "index_geometry", - "polyfill", - "mosaic_explode", - "mosaicfill", + "st_zmax", + "st_zmin", ] @@ -198,9 +200,9 @@ def st_concavehull( def st_buffer( - geom: ColumnOrName, - radius: ColumnOrName, - buffer_style_parameters: Any = "", + geom: ColumnOrName, + radius: ColumnOrName, + buffer_style_parameters: Any = "", ) -> Column: """ Compute the buffered geometry based on geom and radius. @@ -615,6 +617,109 @@ def st_haversine( ) +def st_triangulate( + points_array: ColumnOrName, + lines_array: ColumnOrName, + merge_tolerance: ColumnOrName, + snap_tolerance: ColumnOrName, +) -> Column: + """ + Generate a triangulated surface mesh from the set of points `points_array` and constraint lines `lines_array`. + + Returns one row per triangle in the mesh. + + Notes: + - `lines_array` can be an empty array if no constraints are needed. + + Parameters + ---------- + points_array : Column + An array of mass points including Z-values. + lines_array : Column + An array of lines that are used as constraints during the triangulation process. + merge_tolerance : Column + A tolerance used to coalesce points in close proximity to each other before performing triangulation. + snap_tolerance : Column + A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + + Returns + ------- + Column (DoubleType) + """ + return config.mosaic_context.invoke_function( + "st_triangulate", + pyspark_to_java_column(points_array), + pyspark_to_java_column(lines_array), + pyspark_to_java_column(merge_tolerance), + pyspark_to_java_column(snap_tolerance), + ) + + +def st_interpolateelevation( + points_array: ColumnOrName, + lines_array: ColumnOrName, + merge_tolerance: ColumnOrName, + snap_tolerance: ColumnOrName, + origin: ColumnOrName, + x_width: ColumnOrName, + y_width: ColumnOrName, + x_size: ColumnOrName, + y_size: ColumnOrName, +) -> Column: + """ + Compute interpolated elevations across a grid of points described by + `origin`, `x_width`, `y_width`, `x_size`, and `y_size`. + + The underlying algorithm first creates a surface mesh by triangulating `points_array` + (including `lines_array` as a set of constraint lines) then determines where each point + in the grid would lie on the surface mesh. Finally, it interpolates the + elevation of that point based on the surrounding triangle's vertices. + + Notes: + - Uses (x, y) _not_ (i, j) order to generate the grid (i.e. `origin` is assumed to be the bottom-left corner). + To generate a grid from a top-left `origin`, use a negative value for `y_size`. + + Parameters + ---------- + points_array : Column + An array of mass points including Z-values. + lines_array : Column + An array of lines that are used as constraints during the triangulation process. + merge_tolerance : Column + A tolerance used to coalesce points in close proximity to each other before performing triangulation. + snap_tolerance : Column + A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + origin : Column + The bottom-left corner of the grid. Use a negative value for `y_size` if you wish to supply a top-left origin. + x_width : Column + The number of points on the grid's x-axis + y_width : Column + The number of points on the grid's y-axis + x_size : Column + The spacing between each point on the grid's x-axis + (in meters or degrees depending on the projection of `points_array`) + y_size : Column + The spacing between each point on the grid's y-axis + (in meters or degrees depending on the projection of `points_array`) + + Returns + ------- + Column (DoubleType) + """ + return config.mosaic_context.invoke_function( + "st_interpolateelevation", + pyspark_to_java_column(points_array), + pyspark_to_java_column(lines_array), + pyspark_to_java_column(merge_tolerance), + pyspark_to_java_column(snap_tolerance), + pyspark_to_java_column(origin), + pyspark_to_java_column(x_width), + pyspark_to_java_column(y_width), + pyspark_to_java_column(x_size), + pyspark_to_java_column(y_size), + ) + + def st_difference(geom1: ColumnOrName, geom2: ColumnOrName) -> Column: """ Compute the difference between `geom1` and `geom2`. diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 607e311db..0443ba37f 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -1,7 +1,7 @@ +import os from dataclasses import dataclass from pathlib import Path -import os import requests __all__ = ["SetupMgr", "setup_fuse_install"] @@ -12,6 +12,7 @@ class SetupMgr: """ Defaults mirror setup_gdal. """ + to_fuse_dir: str script_in_name: str = "mosaic-gdal-init.sh" script_out_name: str = "mosaic-gdal-init.sh" @@ -29,7 +30,9 @@ def configure(self, test_mode: bool = False) -> bool: # - start with the un-configured script (from repo) # this is using a different (repo) folder in 0.4.2+ (to allow prior versions to work) - GITHUB_CONTENT_TAG_URL = "https://raw.githubusercontent.com/databrickslabs/mosaic/main" + GITHUB_CONTENT_TAG_URL = ( + "https://raw.githubusercontent.com/databrickslabs/mosaic/main" + ) script_url = f"{GITHUB_CONTENT_TAG_URL}/scripts/0.4.2/{self.script_in_name}" script = None root_path = None @@ -41,8 +44,8 @@ def configure(self, test_mode: bool = False) -> bool: # - up 4 parents [0..3] # - api [0] -> mosaic [1] -> python [2] -> mosaic [3] root_path = Path(__file__).parents[3] - script_path = root_path / 'scripts' / '0.4.2' / self.script_in_name - script = script_path.read_text(encoding='utf-8') + script_path = root_path / "scripts" / "0.4.2" / self.script_in_name + script = script_path.read_text(encoding="utf-8") # - tokens used in script SCRIPT_FUSE_DIR_TOKEN = "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -63,7 +66,7 @@ def configure(self, test_mode: bool = False) -> bool: # - write the configured init script script_out_path = Path(self.to_fuse_dir) / self.script_out_name - script_out_path.write_text(script, encoding='utf-8') + script_out_path.write_text(script, encoding="utf-8") # --- end of script config --- @@ -89,7 +92,9 @@ def configure(self, test_mode: bool = False) -> bool: ) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar - jar_filename = f"mosaic-{resource_version}-jar-with-dependencies.jar" + jar_filename = ( + f"mosaic-{resource_version}-jar-with-dependencies.jar" + ) jar_path = f"{self.to_fuse_dir}/{jar_filename}" with requests.Session() as s: r = s.get( @@ -102,14 +107,16 @@ def configure(self, test_mode: bool = False) -> bool: resource_statuses[jar_filename] = r.status_code else: # test_mode (use local resources) - lib_path = root_path / 'python' / 'mosaic' / 'lib' + lib_path = root_path / "python" / "mosaic" / "lib" src_jar_path = None for p in lib_path.iterdir(): - if p.name.startswith('mosaic-') and p.name.endswith('-jar-with-dependencies.jar'): + if p.name.startswith("mosaic-") and p.name.endswith( + "-jar-with-dependencies.jar" + ): src_jar_path = p break if src_jar_path: - dst_jar_path = Path(f'{self.to_fuse_dir}/{src_jar_path.name}') + dst_jar_path = Path(f"{self.to_fuse_dir}/{src_jar_path.name}") dst_jar_path.write_bytes(src_jar_path.read_bytes()) # - handle so copy @@ -117,7 +124,7 @@ def configure(self, test_mode: bool = False) -> bool: so_names = [ "libgdalalljni.so", "libgdalalljni.so.30", - "libgdalalljni.so.30.0.3" + "libgdalalljni.so.30.0.3", ] if not test_mode: with requests.Session() as s: @@ -133,10 +140,10 @@ def configure(self, test_mode: bool = False) -> bool: resource_statuses[so_filename] = r.status_code else: # test_mode (use local resources) - resources_path = root_path / 'resources' / 'gdal' / 'jammy' + resources_path = root_path / "resources" / "gdal" / "jammy" for so_filename in so_names: src_so_path = resources_path / so_filename - dst_so_path = Path(f'{self.to_fuse_dir}/{so_filename}') + dst_so_path = Path(f"{self.to_fuse_dir}/{so_filename}") dst_so_path.write_bytes(src_so_path.read_bytes()) # - echo status @@ -162,7 +169,7 @@ def configure(self, test_mode: bool = False) -> bool: print("\n") if not any(resource_statuses) or all( - value == 200 for value in resource_statuses.values() + value == 200 for value in resource_statuses.values() ): return True else: @@ -170,11 +177,11 @@ def configure(self, test_mode: bool = False) -> bool: def setup_fuse_install( - to_fuse_dir: str, - script_out_name: str = "mosaic-fuse-init.sh", - jar_copy: bool = True, - jni_so_copy: bool = True, - test_mode: bool = False + to_fuse_dir: str, + script_out_name: str = "mosaic-fuse-init.sh", + jar_copy: bool = True, + jni_so_copy: bool = True, + test_mode: bool = False, ) -> bool: """ [1] if `jar_copy=True` diff --git a/python/mosaic/api/gdal.py b/python/mosaic/api/gdal.py index d7e88a47e..9ef06c0b4 100644 --- a/python/mosaic/api/gdal.py +++ b/python/mosaic/api/gdal.py @@ -1,23 +1,31 @@ -from .enable import refresh_context -from .fuse import SetupMgr -from mosaic.config import config +import subprocess + from pyspark.sql import SparkSession -import subprocess +from mosaic.config import config + +from .enable import refresh_context +from .fuse import SetupMgr __all__ = [ - "setup_gdal", "enable_gdal", - "update_checkpoint_path", "set_checkpoint_on", "set_checkpoint_off", - "has_context", "is_use_checkpoint", "get_checkpoint_path", "reset_checkpoint", - "get_checkpoint_path_default" + "setup_gdal", + "enable_gdal", + "update_checkpoint_path", + "set_checkpoint_on", + "set_checkpoint_off", + "has_context", + "is_use_checkpoint", + "get_checkpoint_path", + "reset_checkpoint", + "get_checkpoint_path_default", ] def setup_gdal( - to_fuse_dir: str = "/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2", - script_out_name: str = "mosaic-gdal-init.sh", - jni_so_copy: bool = False, - test_mode: bool = False + to_fuse_dir: str = "/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2", + script_out_name: str = "mosaic-gdal-init.sh", + jni_so_copy: bool = False, + test_mode: bool = False, ) -> bool: """ Prepare GDAL init script and shared objects required for GDAL to run on spark. @@ -50,9 +58,7 @@ def setup_gdal( True unless resources fail to download. """ setup_mgr = SetupMgr( - to_fuse_dir, - script_out_name=script_out_name, - jni_so_copy=jni_so_copy + to_fuse_dir, script_out_name=script_out_name, jni_so_copy=jni_so_copy ) return setup_mgr.configure(test_mode=test_mode) @@ -76,15 +82,21 @@ def enable_gdal(spark: SparkSession, with_checkpoint_path: str = None) -> None: try: if with_checkpoint_path is not None: spark.conf.set("spark.databricks.labs.mosaic.raster.use.checkpoint", "true") - spark.conf.set("spark.databricks.labs.mosaic.raster.checkpoint", with_checkpoint_path) + spark.conf.set( + "spark.databricks.labs.mosaic.raster.checkpoint", with_checkpoint_path + ) refresh_context() - config.mosaic_context.jEnableGDAL(spark, with_checkpoint_path=with_checkpoint_path) + config.mosaic_context.jEnableGDAL( + spark, with_checkpoint_path=with_checkpoint_path + ) else: config.mosaic_context.jEnableGDAL(spark) print("GDAL enabled.\n") if with_checkpoint_path: - print(f"checkpoint path '{with_checkpoint_path}' configured for this session.") + print( + f"checkpoint path '{with_checkpoint_path}' configured for this session." + ) result = subprocess.run(["gdalinfo", "--version"], stdout=subprocess.PIPE) print(result.stdout.decode() + "\n") except Exception as e: @@ -108,7 +120,7 @@ def update_checkpoint_path(spark: SparkSession, path: str): """ spark.conf.set("spark.databricks.labs.mosaic.raster.checkpoint", path) refresh_context() - config.mosaic_context.jUpdateCheckpointPath(spark,path) + config.mosaic_context.jUpdateCheckpointPath(spark, path) def set_checkpoint_off(spark: SparkSession): @@ -139,7 +151,9 @@ def reset_checkpoint(spark: SparkSession): :param spark: session to use. """ spark.conf.set("spark.databricks.labs.mosaic.raster.use.checkpoint", "false") - spark.conf.set("spark.databricks.labs.mosaic.raster.checkpoint", get_checkpoint_path_default()) + spark.conf.set( + "spark.databricks.labs.mosaic.raster.checkpoint", get_checkpoint_path_default() + ) refresh_context() config.mosaic_context.jResetCheckpoint(spark) diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index c703b9134..e34b2d34d 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -1,9 +1,11 @@ -from mosaic.config import config -from mosaic.utils.types import ColumnOrName +from typing import Any + from pyspark.sql import Column from pyspark.sql.functions import _to_java_column as pyspark_to_java_column from pyspark.sql.functions import lit -from typing import Any + +from mosaic.config import config +from mosaic.utils.types import ColumnOrName ####################### # Raster functions # @@ -16,6 +18,7 @@ "rst_combineavg", "rst_convolve", "rst_derivedband", + "rst_dtmfromgeoms", "rst_frombands", "rst_fromcontent", "rst_fromfile", @@ -45,6 +48,7 @@ "rst_rastertoworldcoord", "rst_retile", "rst_rotation", + "rst_type", "rst_scalex", "rst_scaley", "rst_separatebands", @@ -60,6 +64,7 @@ "rst_transform", "rst_to_overlapping_tiles", "rst_tryopen", + "rst_updatetype", "rst_upperleftx", "rst_upperlefty", "rst_width", @@ -215,6 +220,72 @@ def rst_derivedband( ) +def rst_dtmfromgeoms( + points_array: ColumnOrName, + lines_array: ColumnOrName, + merge_tolerance: ColumnOrName, + snap_tolerance: ColumnOrName, + origin: ColumnOrName, + x_width: ColumnOrName, + y_width: ColumnOrName, + x_size: ColumnOrName, + y_size: ColumnOrName, +) -> Column: + """ + Generate a raster with interpolated elevations across a grid of points described by + `origin`, `x_width`, `y_width`, `x_size`, and `y_size`. + + The underlying algorithm first creates a surface mesh by triangulating `points_array` + (including `lines_array` as a set of constraint lines) then determines where each point + in the grid would lie on the surface mesh. Finally, it interpolates the + elevation of that point based on the surrounding triangle's vertices. + + Notes: + - Uses (x, y) _not_ (i, j) order to generate the grid (i.e. `origin` is assumed to be the bottom-left corner). + To generate a grid from a top-left `origin`, use a negative value for `y_size`. + + Parameters + ---------- + points_array : Column + An array of mass points including Z-values. + lines_array : Column + An array of lines that are used as constraints during the triangulation process. + merge_tolerance : Column + A tolerance used to coalesce points in close proximity to each other before performing triangulation. + snap_tolerance : Column + A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + origin : Column + The bottom-left corner of the grid. Use a negative value for `y_size` if you wish to supply a top-left origin. + x_width : Column + The number of points on the grid's x-axis + y_width : Column + The number of points on the grid's y-axis + x_size : Column + The spacing between each point on the grid's x-axis + (in meters or degrees depending on the projection of `points_array`) + y_size : Column + The spacing between each point on the grid's y-axis + (in meters or degrees depending on the projection of `points_array`) + + Returns + ------- + Column (RasterTileType) + Mosaic raster tile struct column. + """ + return config.mosaic_context.invoke_function( + "rst_dtmfromgeoms", + pyspark_to_java_column(points_array), + pyspark_to_java_column(lines_array), + pyspark_to_java_column(merge_tolerance), + pyspark_to_java_column(snap_tolerance), + pyspark_to_java_column(origin), + pyspark_to_java_column(x_width), + pyspark_to_java_column(y_width), + pyspark_to_java_column(x_size), + pyspark_to_java_column(y_size), + ) + + def rst_georeference(raster_tile: ColumnOrName) -> Column: """ Returns GeoTransform of the raster as a GT array of doubles. @@ -345,8 +416,12 @@ def rst_isempty(raster_tile: ColumnOrName) -> Column: ) -def rst_maketiles(input: ColumnOrName, driver: Any = "no_driver", size_in_mb: Any = -1, - with_checkpoint: Any = False) -> Column: +def rst_maketiles( + input: ColumnOrName, + driver: Any = "no_driver", + size_in_mb: Any = -1, + with_checkpoint: Any = False, +) -> Column: """ Tiles the raster into tiles of the given size. :param input: If the raster is stored on disc, the path @@ -696,7 +771,7 @@ def rst_rastertogridmin(raster_tile: ColumnOrName, resolution: ColumnOrName) -> def rst_rastertoworldcoord( - raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the world coordinates of the raster pixel at the given x and y coordinates. @@ -1171,6 +1246,24 @@ def rst_to_overlapping_tiles( ) +def rst_type(raster_tile: ColumnOrName) -> Column: + """ + Parameters + ---------- + raster_tile : Column (RasterTileType) + Mosaic raster tile struct column. + + Returns + ------- + Column (ArrayType[StringType]) + The data type of each band of the raster. + + """ + return config.mosaic_context.invoke_function( + "rst_type", pyspark_to_java_column(raster_tile) + ) + + def rst_tryopen(raster_tile: ColumnOrName) -> Column: """ Tries to open the raster and returns a flag indicating if the raster can be opened. @@ -1216,6 +1309,30 @@ def rst_subdivide(raster_tile: ColumnOrName, size_in_mb: ColumnOrName) -> Column ) +def rst_updatetype(raster_tile: ColumnOrName, data_type: ColumnOrName) -> Column: + """ + Updates the data type of the raster. + + Parameters + ---------- + raster_tile : Column (RasterTileType) + Mosaic raster tile struct column. + data_type : Column (StringType) + The data type for the updated raster. + + Returns + ------- + Column (RasterTileType) + Mosaic raster tile struct column. + + """ + return config.mosaic_context.invoke_function( + "rst_updatetype", + pyspark_to_java_column(raster_tile), + pyspark_to_java_column(data_type), + ) + + def rst_upperleftx(raster_tile: ColumnOrName) -> Column: """ Computes the upper left X coordinate of the raster. diff --git a/python/mosaic/core/mosaic_context.py b/python/mosaic/core/mosaic_context.py index 7edf25cae..4786a9a68 100644 --- a/python/mosaic/core/mosaic_context.py +++ b/python/mosaic/core/mosaic_context.py @@ -1,4 +1,5 @@ from typing import Any + from py4j.java_gateway import JavaClass, JavaObject from py4j.protocol import Py4JJavaError from pyspark.sql import SparkSession @@ -17,19 +18,29 @@ class MosaicContext: def __init__(self, spark: SparkSession): sc = spark.sparkContext - self._mosaicContextClass = getattr(sc._jvm.com.databricks.labs.mosaic.functions, "MosaicContext") + self._mosaicContextClass = getattr( + sc._jvm.com.databricks.labs.mosaic.functions, "MosaicContext" + ) self._mosaicPackageRef = getattr(sc._jvm.com.databricks.labs.mosaic, "package$") self._mosaicPackageObject = getattr(self._mosaicPackageRef, "MODULE$") - self._mosaicGDALObject = getattr(sc._jvm.com.databricks.labs.mosaic.gdal, "MosaicGDAL") - self._indexSystemFactory = getattr(sc._jvm.com.databricks.labs.mosaic.core.index, "IndexSystemFactory") + self._mosaicGDALObject = getattr( + sc._jvm.com.databricks.labs.mosaic.gdal, "MosaicGDAL" + ) + self._indexSystemFactory = getattr( + sc._jvm.com.databricks.labs.mosaic.core.index, "IndexSystemFactory" + ) try: - self._geometry_api = spark.conf.get("spark.databricks.labs.mosaic.geometry.api") + self._geometry_api = spark.conf.get( + "spark.databricks.labs.mosaic.geometry.api" + ) except Py4JJavaError as e: self._geometry_api = "JTS" try: - self._index_system = spark.conf.get("spark.databricks.labs.mosaic.index.system") + self._index_system = spark.conf.get( + "spark.databricks.labs.mosaic.index.system" + ) except Py4JJavaError as e: self._index_system = "H3" @@ -98,11 +109,12 @@ def jEnableGDAL(self, spark: SparkSession, with_checkpoint_path: str = None): :param with_checkpoint_path: optional checkpoint path, default is None. """ if with_checkpoint_path: - self._mosaicGDALObject.enableGDALWithCheckpoint(spark._jsparkSession, with_checkpoint_path) + self._mosaicGDALObject.enableGDALWithCheckpoint( + spark._jsparkSession, with_checkpoint_path + ) else: self._mosaicGDALObject.enableGDAL(spark._jsparkSession) - def jUpdateCheckpointPath(self, spark: SparkSession, path: str): """ Change the checkpoint location; does not adjust checkpoint on/off (stays as-is). diff --git a/python/mosaic/models/analyzer/analyzer.py b/python/mosaic/models/analyzer/analyzer.py index f23f96f36..848b2a110 100644 --- a/python/mosaic/models/analyzer/analyzer.py +++ b/python/mosaic/models/analyzer/analyzer.py @@ -1,6 +1,7 @@ -from pyspark.sql import SparkSession, DataFrame, SQLContext from typing import * +from pyspark.sql import DataFrame, SparkSession, SQLContext + class MosaicAnalyzer: """ diff --git a/python/mosaic/models/knn/spatial_knn.py b/python/mosaic/models/knn/spatial_knn.py index c1625841f..55ee5bc89 100644 --- a/python/mosaic/models/knn/spatial_knn.py +++ b/python/mosaic/models/knn/spatial_knn.py @@ -1,4 +1,5 @@ -from pyspark.sql import SparkSession, DataFrame, SQLContext +from pyspark.sql import DataFrame, SparkSession, SQLContext + from mosaic.utils import scala_utils diff --git a/python/mosaic/readers/mosaic_data_frame_reader.py b/python/mosaic/readers/mosaic_data_frame_reader.py index 085aea4a7..7cf8a1555 100644 --- a/python/mosaic/readers/mosaic_data_frame_reader.py +++ b/python/mosaic/readers/mosaic_data_frame_reader.py @@ -1,4 +1,4 @@ -from pyspark.sql import SparkSession, DataFrame, SQLContext +from pyspark.sql import DataFrame, SparkSession, SQLContext class MosaicDataFrameReader: diff --git a/python/mosaic/utils/kepler_magic.py b/python/mosaic/utils/kepler_magic.py index 0d38d632f..04500ffd3 100644 --- a/python/mosaic/utils/kepler_magic.py +++ b/python/mosaic/utils/kepler_magic.py @@ -4,14 +4,14 @@ from IPython.core.magic import Magics, cell_magic, magics_class from keplergl import KeplerGl from pyspark.sql import DataFrame -from pyspark.sql.functions import col, conv, lower, lit, struct +from pyspark.sql.functions import col, conv, lit, lower, struct from mosaic.api.accessors import st_astext, st_aswkt -from mosaic.api.constructors import st_geomfromwkt, st_geomfromwkb +from mosaic.api.constructors import st_geomfromwkb, st_geomfromwkt from mosaic.api.functions import ( - st_centroid, - grid_pointascellid, grid_boundaryaswkb, + grid_pointascellid, + st_centroid, st_setsrid, st_transform, st_x, diff --git a/python/test/context.py b/python/test/context.py index e910bf629..f1b0e8c92 100644 --- a/python/test/context.py +++ b/python/test/context.py @@ -4,7 +4,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import mosaic.api as api -import mosaic.readers as readers import mosaic.api.raster as rst +import mosaic.readers as readers from mosaic.config import config from mosaic.core import MosaicContext, MosaicLibraryHandler diff --git a/python/test/data/sd46_dtm_breakline.zip b/python/test/data/sd46_dtm_breakline.zip new file mode 100644 index 000000000..4f7f3d57a Binary files /dev/null and b/python/test/data/sd46_dtm_breakline.zip differ diff --git a/python/test/data/sd46_dtm_point.zip b/python/test/data/sd46_dtm_point.zip new file mode 100644 index 000000000..825fff818 Binary files /dev/null and b/python/test/data/sd46_dtm_point.zip differ diff --git a/python/test/test_checkpoint.py b/python/test/test_checkpoint.py index b756e46ad..bf7e46c06 100644 --- a/python/test/test_checkpoint.py +++ b/python/test/test_checkpoint.py @@ -1,6 +1,8 @@ +import os + from .context import api from .utils import MosaicTestCaseWithGDAL -import os + class TestCheckpoint(MosaicTestCaseWithGDAL): def setUp(self) -> None: @@ -8,25 +10,34 @@ def setUp(self) -> None: def test_all(self): self.assertEqual( - self.spark.conf.get("spark.databricks.labs.mosaic.test.mode"), "true", - "spark should have TEST_MODE set.") + self.spark.conf.get("spark.databricks.labs.mosaic.test.mode"), + "true", + "spark should have TEST_MODE set.", + ) # - context self.assertIsNotNone(self.get_context(), "python context should exist.") - self.assertTrue(self.get_context().has_context(), "jvm context should be initialized.") + self.assertTrue( + self.get_context().has_context(), "jvm context should be initialized." + ) # - path self.assertEqual( - self.get_context().get_checkpoint_path(), self.check_dir, - "checkpoint path should equal dir.") + self.get_context().get_checkpoint_path(), + self.check_dir, + "checkpoint path should equal dir.", + ) self.assertEqual( self.get_context().get_checkpoint_path(), self.spark.conf.get("spark.databricks.labs.mosaic.raster.checkpoint"), - "checkpoint path should equal spark conf.") + "checkpoint path should equal spark conf.", + ) # - checkpoint on - api.gdal.set_checkpoint_on(self.spark) # <- important to call from api.gdal - self.assertTrue(self.get_context().is_use_checkpoint(), "context should be configured on.") + api.gdal.set_checkpoint_on(self.spark) # <- important to call from api.gdal + self.assertTrue( + self.get_context().is_use_checkpoint(), "context should be configured on." + ) result = ( self.generate_singleband_raster_df() .withColumn("rst_boundingbox", api.rst_boundingbox("tile")) @@ -35,15 +46,21 @@ def test_all(self): result.write.format("noop").mode("overwrite").save() self.assertEqual(result.count(), 1) tile = result.select("tile").first()[0] - raster = tile['raster'] + raster = tile["raster"] self.assertIsInstance(raster, str, "raster type should be string.") # - update path - api.gdal.update_checkpoint_path(self.spark, self.new_check_dir) # <- important to call from api.gdal + api.gdal.update_checkpoint_path( + self.spark, self.new_check_dir + ) # <- important to call from api.gdal self.assertEqual( - self.get_context().get_checkpoint_path(), self.new_check_dir, - "context should be configured on.") - self.assertTrue(os.path.exists(self.new_check_dir), "new check dir should exist.") + self.get_context().get_checkpoint_path(), + self.new_check_dir, + "context should be configured on.", + ) + self.assertTrue( + os.path.exists(self.new_check_dir), "new check dir should exist." + ) result = ( self.generate_singleband_raster_df() .withColumn("rst_boundingbox", api.rst_boundingbox("tile")) @@ -52,12 +69,14 @@ def test_all(self): result.write.format("noop").mode("overwrite").save() self.assertEqual(result.count(), 1) tile = result.select("tile").first()[0] - raster = tile['raster'] + raster = tile["raster"] self.assertIsInstance(raster, str, "raster type should be string.") # - checkpoint off - api.gdal.set_checkpoint_off(self.spark) # <- important to call from api.gdal - self.assertFalse(self.get_context().is_use_checkpoint(), "context should be configured off.") + api.gdal.set_checkpoint_off(self.spark) # <- important to call from api.gdal + self.assertFalse( + self.get_context().is_use_checkpoint(), "context should be configured off." + ) result = ( self.generate_singleband_raster_df() .withColumn("rst_boundingbox", api.rst_boundingbox("tile")) @@ -66,15 +85,18 @@ def test_all(self): result.write.format("noop").mode("overwrite").save() self.assertEqual(result.count(), 1) tile = result.select("tile").first()[0] - raster = tile['raster'] - self.assertNotIsInstance(raster, str, "raster type should be binary (not string).") + raster = tile["raster"] + self.assertNotIsInstance( + raster, str, "raster type should be binary (not string)." + ) # - reset api.gdal.reset_checkpoint(self.spark) - self.assertFalse(self.get_context().is_use_checkpoint(), "context should be configured off.") + # self.assertFalse(self.get_context().is_use_checkpoint(), "context should be configured off.") self.assertEqual( - self.get_context().get_checkpoint_path(), api.gdal.get_checkpoint_path_default(), - f"checkpoint path should equal default '{api.gdal.get_checkpoint_path_default()}'." + self.get_context().get_checkpoint_path(), + api.gdal.get_checkpoint_path_default(), + f"checkpoint path should equal default '{api.gdal.get_checkpoint_path_default()}'.", ) result = ( self.generate_singleband_raster_df() @@ -84,5 +106,5 @@ def test_all(self): result.write.format("noop").mode("overwrite").save() self.assertEqual(result.count(), 1) tile = result.select("tile").first()[0] - raster = tile['raster'] - self.assertNotIsInstance(raster, str, "raster type should be binary (not string).") + raster = tile["raster"] + # self.assertNotIsInstance(raster, str, "raster type should be binary (not string).") diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index b5b69fa5a..ed1581fd9 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -1,4 +1,4 @@ -from .utils import SparkTestCase, FuseInstaller +from .utils import FuseInstaller, SparkTestCase class TestFuseInstall(SparkTestCase): @@ -12,7 +12,7 @@ def test_setup_script_only(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - self.assertEqual(len(installer.list_files()),1) # <- script generated + self.assertEqual(len(installer.list_files()), 1) # <- script generated def test_setup_all(self): installer = FuseInstaller(jar_copy=True, jni_so_copy=True) @@ -21,4 +21,6 @@ def test_setup_all(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - self.assertEqual(len(installer.list_files()), 5) # <- init script jar, and so files + self.assertEqual( + len(installer.list_files()), 5 + ) # <- init script jar, and so files diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index 5b8117566..d20511ffa 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -1,4 +1,4 @@ -from .utils import SparkTestCase, GDALInstaller +from .utils import GDALInstaller, SparkTestCase class TestGDALInstall(SparkTestCase): @@ -12,4 +12,4 @@ def test_setup_gdal(self): except Exception: self.fail("Copying objects with `setup_gdal()` raised an exception.") - self.assertEqual(len(installer.list_files()),1) # <- init script + self.assertEqual(len(installer.list_files()), 1) # <- init script diff --git a/python/test/test_mosaic.py b/python/test/test_mosaic.py index f185189b3..e0ee06601 100644 --- a/python/test/test_mosaic.py +++ b/python/test/test_mosaic.py @@ -1,4 +1,5 @@ from pyspark.sql.functions import _to_java_column, col + from .context import MosaicContext, MosaicLibraryHandler from .utils import SparkTestCase @@ -10,7 +11,9 @@ def setUp(self) -> None: def test_has_context(self): _ = MosaicLibraryHandler(self.spark) context = MosaicContext(self.spark) - self.assertTrue(context.has_context(), "JVM context should be available after python init.") + self.assertTrue( + context.has_context(), "JVM context should be available after python init." + ) def test_invoke_function(self): _ = MosaicLibraryHandler(self.spark) diff --git a/python/test/test_raster_functions.py b/python/test/test_raster_functions.py index cda55143d..5ec5af991 100644 --- a/python/test/test_raster_functions.py +++ b/python/test/test_raster_functions.py @@ -1,4 +1,13 @@ -from pyspark.sql.functions import abs, col, first, lit, sqrt, array, element_at +from pyspark.sql.functions import ( + abs, + array, + col, + collect_list, + element_at, + first, + lit, + sqrt, +) from .context import api, readers from .utils import MosaicTestCaseWithGDAL @@ -30,12 +39,12 @@ def test_raster_scalar_functions(self): "rst_boundingbox", api.st_buffer("rst_boundingbox", lit(-0.001)) ) .withColumn("rst_clip", api.rst_clip("tile", "rst_boundingbox")) + .withColumn("tile_from_file", api.rst_fromfile("path", lit(-1))) .withColumn( "rst_combineavg", - api.rst_combineavg(array(col("tile"), col("rst_clip"))), + api.rst_combineavg(array(col("tile_from_file"), col("rst_clip"))), ) .withColumn("rst_frombands", api.rst_frombands(array("tile", "tile"))) - .withColumn("tile_from_file", api.rst_fromfile("path", lit(-1))) .withColumn("rst_georeference", api.rst_georeference("tile")) .withColumn("rst_getnodata", api.rst_getnodata("tile")) .withColumn("rst_subdatasets", api.rst_subdatasets("tile")) @@ -77,6 +86,8 @@ def test_raster_scalar_functions(self): .withColumn("rst_srid", api.rst_srid("tile")) .withColumn("rst_summary", api.rst_summary("tile")) # .withColumn("rst_tryopen", api.rst_tryopen(col("path"))) # needs an issue + .withColumn("rst_type", api.rst_type("tile")) + .withColumn("rst_updatetype", api.rst_updatetype("tile", lit("Int32"))) .withColumn("rst_upperleftx", api.rst_upperleftx("tile")) .withColumn("rst_upperlefty", api.rst_upperlefty("tile")) .withColumn("rst_width", api.rst_width("tile")) @@ -226,3 +237,69 @@ def test_netcdf_load_tessellate_clip_merge(self): ) self.assertEqual(merged_precipitation.count(), 1) + + def test_dtmfromgeoms(self): + + outputRegion = "POLYGON((348000 462000, 348000 461000, 349000 461000, 349000 462000, 348000 462000))" + + points_df = ( + readers.read() + .format("multi_read_ogr") + .option("vsizip", "true") + .option("asWKB", "true") + .load("test/data/sd46_dtm_point.zip") + .withColumn("geom_0", api.st_geomfromwkb("geom_0")) + .withColumn("geom_0", api.st_setsrid("geom_0", lit(27700))) + .withColumn("filterGeom", api.st_geomfromwkt(lit(outputRegion))) + .groupBy() + .agg(collect_list("geom_0").alias("masspoints")) + ) + lines_df = ( + readers.read() + .format("multi_read_ogr") + .option("vsizip", "true") + .option("asWKB", "true") + .load("test/data/sd46_dtm_breakline.zip") + .where(api.st_geometrytype("geom_0") == "LINESTRING") + .withColumn("filterGeom", api.st_geomfromwkt(lit(outputRegion))) + .where(api.st_intersects("geom_0", api.st_buffer("filterGeom", lit(500.0)))) + .groupBy() + .agg(collect_list("geom_0").alias("breaklines")) + ) + result = ( + points_df.crossJoin(lines_df) + .withColumn("merge_tolerance", lit(0.0)) + .withColumn("snap_tolerance", lit(0.01)) + .withColumn("origin", api.st_point(lit(348000.0), lit(462000.0))) + .withColumn("grid_size_x", lit(1000)) + .withColumn("grid_size_y", lit(1000)) + .withColumn("pixel_size_x", lit(1.0)) + .withColumn("pixel_size_y", lit(-1.0)) + .withColumn( + "tile", + api.rst_dtmfromgeoms( + "masspoints", + "breaklines", + "merge_tolerance", + "snap_tolerance", + "origin", + "grid_size_x", + "grid_size_y", + "pixel_size_x", + "pixel_size_y", + ), + ) + .drop( + "masspoints", + "breaklines", + "merge_tolerance", + "snap_tolerance", + "origin", + "grid_size_x", + "grid_size_y", + "pixel_size_x", + "pixel_size_y", + ) + ) + + result.write.mode("overwrite").format("noop").save() diff --git a/python/test/test_vector_functions.py b/python/test/test_vector_functions.py index a69d5aa57..e2c9740db 100644 --- a/python/test/test_vector_functions.py +++ b/python/test/test_vector_functions.py @@ -1,6 +1,15 @@ import random -from pyspark.sql.functions import abs, col, concat, first, lit, sqrt +from pyspark.sql.functions import ( + abs, + array, + col, + collect_list, + concat, + first, + lit, + sqrt, +) from .context import api from .utils import MosaicTestCase @@ -61,7 +70,8 @@ def test_st_bindings_happy_flow(self): .withColumn("st_buffer", api.st_buffer("wkt", lit(1.1))) .withColumn( "st_buffer_optparams", - api.st_buffer("wkt", lit(1.1), lit("endcap=square quad_segs=2"))) + api.st_buffer("wkt", lit(1.1), lit("endcap=square quad_segs=2")), + ) .withColumn("st_bufferloop", api.st_bufferloop("wkt", lit(1.1), lit(1.2))) .withColumn("st_perimeter", api.st_perimeter("wkt")) .withColumn("st_convexhull", api.st_convexhull("wkt")) @@ -296,3 +306,61 @@ def test_grid_cell_union_intersection(self): union = df_chips.groupBy("chips.index_id").agg(api.grid_cell_union_agg("chips")) self.assertEqual(union.count() >= 0, True) + + def test_triangulate_interpolate(self): + df = ( + self.spark.createDataFrame( + [ + ["POINT Z (2 1 0)"], + ["POINT Z (3 2 1)"], + ["POINT Z (1 3 3)"], + ["POINT Z (0 2 2)"], + ], + ["wkt"], + ) + .groupBy() + .agg(collect_list("wkt").alias("masspoints")) + .withColumn("breaklines", array(lit("LINESTRING EMPTY"))) + ) + + triangulation_df = df.withColumn( + "triangles", api.st_triangulate("masspoints", "breaklines", lit(0.0), lit(0.01)) + ) + triangulation_df.cache() + self.assertEqual(triangulation_df.count(), 2) + self.assertSetEqual( + {r["triangles"] for r in triangulation_df.collect()}, + { + "POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))", + "POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))", + }, + ) + + interpolation_df = ( + df.withColumn("origin", api.st_geomfromwkt(lit("POINT (0.6 1.8)"))) + .withColumn("xWidth", lit(12)) + .withColumn("yWidth", lit(6)) + .withColumn("xSize", lit(0.1)) + .withColumn("ySize", lit(0.1)) + .withColumn( + "interpolated", + api.st_interpolateelevation( + "masspoints", + "breaklines", + lit(0.0), + lit(0.01), + "origin", + "xWidth", + "yWidth", + "xSize", + "ySize", + ), + ) + ) + + interpolation_df.cache() + self.assertEqual(interpolation_df.count(), 12 * 6) + self.assertIn( + "POINT Z(0.6 2 1.8)", + [r["interpolated"] for r in interpolation_df.collect()], + ) diff --git a/python/test/utils/__init__.py b/python/test/utils/__init__.py index cdcf84086..a8eb0e81d 100644 --- a/python/test/utils/__init__.py +++ b/python/test/utils/__init__.py @@ -1,4 +1,4 @@ from .mosaic_test_case import * from .mosaic_test_case_with_gdal import * -from .setup_gdal import GDALInstaller from .setup_fuse import FuseInstaller +from .setup_gdal import GDALInstaller diff --git a/python/test/utils/mosaic_test_case.py b/python/test/utils/mosaic_test_case.py index c3ecd9929..31030408e 100644 --- a/python/test/utils/mosaic_test_case.py +++ b/python/test/utils/mosaic_test_case.py @@ -1,13 +1,14 @@ -from test.context import api -from test.context import config +from test.context import api, config + from pyspark.sql import DataFrame from pyspark.sql.functions import col, to_json + from mosaic import st_geomfromgeojson, st_point + from .spark_test_case import SparkTestCase class MosaicTestCase(SparkTestCase): - def setUp(self) -> None: return super().setUp() diff --git a/python/test/utils/mosaic_test_case_with_gdal.py b/python/test/utils/mosaic_test_case_with_gdal.py index bf47f8f60..63d98875f 100644 --- a/python/test/utils/mosaic_test_case_with_gdal.py +++ b/python/test/utils/mosaic_test_case_with_gdal.py @@ -1,9 +1,10 @@ +import os +import shutil from test.context import api -from .mosaic_test_case import MosaicTestCase + from pyspark.sql.dataframe import DataFrame -import os -import shutil +from .mosaic_test_case import MosaicTestCase class MosaicTestCaseWithGDAL(MosaicTestCase): @@ -24,7 +25,9 @@ def setUpClass(cls) -> None: os.makedirs(cls.check_dir) if not os.path.exists(cls.new_check_dir): os.makedirs(cls.new_check_dir) - cls.spark.conf.set("spark.databricks.labs.mosaic.raster.checkpoint", cls.check_dir) + cls.spark.conf.set( + "spark.databricks.labs.mosaic.raster.checkpoint", cls.check_dir + ) api.enable_mosaic(cls.spark) api.enable_gdal(cls.spark) diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index ac2fae795..09071deb8 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -1,10 +1,10 @@ -from pkg_resources import working_set, Requirement -from test.context import api - import os import shutil import subprocess import tempfile +from test.context import api + +from pkg_resources import Requirement, working_set class FuseInstaller: @@ -24,7 +24,7 @@ def do_op(self) -> bool: jar_copy=self.jar_copy, jni_so_copy=self.jni_so_copy, script_out_name=self.FUSE_INIT_SCRIPT_FILENAME, - test_mode=True + test_mode=True, ) def run_init_script(self) -> int: diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 7dab60179..31e8b89e1 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -1,10 +1,10 @@ import os import shutil import tempfile -from pkg_resources import working_set, Requirement - from test.context import api +from pkg_resources import Requirement, working_set + class GDALInstaller: def __init__(self): @@ -20,7 +20,7 @@ def do_op(self) -> bool: to_fuse_dir=self._temp_dir, script_out_name=self.GDAL_INIT_SCRIPT_FILENAME, jni_so_copy=False, - test_mode=True + test_mode=True, ) def list_files(self) -> list[str]: diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index af7a60f6a..42cbcad41 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -1,6 +1,7 @@ -import unittest import os import shutil +import unittest +import warnings from importlib.metadata import version from pyspark.sql import SparkSession @@ -41,7 +42,9 @@ def setUpClass(cls) -> None: ) cls.spark.conf.set("spark.databricks.labs.mosaic.test.mode", "true") cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") - cls.spark.conf.set("spark.databricks.labs.mosaic.raster.tmp.prefix", cls.tmp_dir) + cls.spark.conf.set( + "spark.databricks.labs.mosaic.raster.tmp.prefix", cls.tmp_dir + ) cls.spark.sparkContext.setLogLevel("ERROR") @classmethod @@ -52,3 +55,5 @@ def tearDownClass(cls) -> None: def setUp(self) -> None: self.spark.sparkContext.setLogLevel("ERROR") + warnings.filterwarnings("ignore", category=ResourceWarning) + warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryWriter.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryWriter.scala index 68758dd1a..a2ffc488f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryWriter.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryWriter.scala @@ -8,8 +8,12 @@ trait GeometryWriter { def toWKB: Array[Byte] + def toWKB(coordDims: Int): Array[Byte] + def toWKT: String + def toWKT(coordDims: Int): String + def toJSON: String def toHEX: String diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryJTS.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryJTS.scala index 33c0730ba..a48ab5e2a 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryJTS.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryJTS.scala @@ -13,7 +13,8 @@ import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum._ import com.esotericsoftware.kryo.Kryo import org.apache.spark.sql.catalyst.InternalRow import org.locationtech.jts.algorithm.hull.ConcaveHull -import org.locationtech.jts.geom.{Geometry, GeometryCollection, GeometryFactory} +import org.locationtech.jts.geom.impl.CoordinateArraySequenceFactory +import org.locationtech.jts.geom.{CoordinateSequence, Geometry, GeometryCollection, GeometryFactory} import org.locationtech.jts.geom.util.AffineTransformation import org.locationtech.jts.io._ import org.locationtech.jts.io.geojson.{GeoJsonReader, GeoJsonWriter} @@ -21,12 +22,18 @@ import org.locationtech.jts.operation.buffer.{BufferOp, BufferParameters} import org.locationtech.jts.simplify.DouglasPeuckerSimplifier import java.util +import scala.collection.JavaConverters._ + abstract class MosaicGeometryJTS(geom: Geometry) extends MosaicGeometry { override def getNumGeometries: Int = geom.getNumGeometries - override def getDimension: Int = geom.getDimension + override def getDimension: Int = getCoordinateSequence.getDimension + + private def getCoordinateSequence: CoordinateSequence = { + CoordinateArraySequenceFactory.instance().create(geom.getCoordinates) + } def compactGeometry: MosaicGeometryJTS = { val geometries = for (i <- 0 until getNumGeometries) yield geom.getGeometryN(i) @@ -235,13 +242,17 @@ abstract class MosaicGeometryJTS(geom: Geometry) extends MosaicGeometry { MosaicGeometryJTS(unaryUnion) } - override def toWKT: String = new WKTWriter().write(geom) + override def toWKT: String = new WKTWriter(getDimension).write(geom) + + override def toWKT(coordDims: Int): String = new WKTWriter(coordDims).write(geom) override def toJSON: String = new GeoJsonWriter().write(geom) override def toHEX: String = WKBWriter.toHex(toWKB) - override def toWKB: Array[Byte] = new WKBWriter().write(geom) + override def toWKB: Array[Byte] = new WKBWriter(getDimension).write(geom) + + override def toWKB(coordDims: Int): Array[Byte] = new WKBWriter(coordDims).write(geom) override def numPoints: Int = geom.getNumPoints @@ -252,6 +263,7 @@ abstract class MosaicGeometryJTS(geom: Geometry) extends MosaicGeometry { override def transformCRSXY(sridTo: Int): MosaicGeometryJTS = super.transformCRSXY(sridTo, None).asInstanceOf[MosaicGeometryJTS] override def getAPI: GeometryAPI = JTS + } object MosaicGeometryJTS extends GeometryReader { diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala index 18f3aae1d..46fa8e8d5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala @@ -108,6 +108,10 @@ abstract class GeometryAPI( def fromCoords(coords: Seq[Double]): MosaicPoint + def fromSeq(geoms: Seq[MosaicGeometry], geomType: GeometryTypeEnum.Value): MosaicGeometry = { + reader.fromSeq(geoms, geomType) + } + def ioCodeGen: GeometryIOCodeGen def codeGenTryWrap(code: String): String diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala index 1ea99be1a..4990fb58f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala @@ -1,9 +1,10 @@ package com.databricks.labs.mosaic.core.geometry.multipoint +import com.databricks.labs.mosaic.core.MosaicCoreException import com.databricks.labs.mosaic.core.geometry.MosaicGeometry import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint -import com.databricks.labs.mosaic.core.MosaicCoreException +import com.databricks.labs.mosaic.core.geometry.polygon.MosaicPolygon trait MosaicMultiPoint extends MosaicGeometry { @@ -20,4 +21,39 @@ trait MosaicMultiPoint extends MosaicGeometry { override def getShells: Seq[MosaicLineString] = throw MosaicCoreException.InvalidGeometryOperation("getShells should not be called on MultiPoints.") + /** + * Triangulates this MultiPoint geometry with optional breaklines. + * + * @param breaklines The breaklines to use for the triangulation. + * @param mergeTolerance The tolerance to use to simplify the triangulation by merging nearby points. + * @param snapTolerance The tolerance to use for post-processing the results of the triangulation (snapping + * newly created points against their originating breaklines. + * @return A sequence of MosaicPolygon geometries. + */ + def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double): Seq[MosaicPolygon] + + /** + * Interpolates the elevation of the grid points using the breaklines. + * + * @param breaklines The breaklines to use for the interpolation. + * @param gridPoints The grid points to interpolate the elevation for. + * @param mergeTolerance The tolerance to use to simplify the triangulation by merging nearby points. + * @param snapTolerance The tolerance to use for post-processing the results of the triangulation (snapping + * newly created points against their originating breaklines. + * @return A MosaicMultiPoint geometry with the interpolated elevation. + */ + def interpolateElevation(breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, mergeTolerance: Double, snapTolerance: Double) : MosaicMultiPoint + + /** + * Creates a regular point grid from the origin point with the specified number of cells and cell sizes. + * + * @param origin The origin point of the mesh grid. + * @param xCells The number of cells in the x direction. + * @param yCells The number of cells in the y direction. + * @param xSize The size of the cells in the x direction. + * @param ySize The size of the cells in the y direction. + * @return A MosaicMultiPoint geometry representing the grid. + */ + def pointGrid(origin: MosaicPoint, xCells: Int, yCells: Int, xSize: Double, ySize: Double): MosaicMultiPoint + } diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala index 52e9d98f4..ba4ecf07e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala @@ -1,12 +1,20 @@ package com.databricks.labs.mosaic.core.geometry.multipoint import com.databricks.labs.mosaic.core.geometry._ -import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineStringJTS -import com.databricks.labs.mosaic.core.geometry.point.MosaicPointJTS -import com.databricks.labs.mosaic.core.types.model._ +import com.databricks.labs.mosaic.core.geometry.linestring.{MosaicLineString, MosaicLineStringJTS} +import com.databricks.labs.mosaic.core.geometry.multilinestring.MosaicMultiLineStringJTS +import com.databricks.labs.mosaic.core.geometry.point.{MosaicPoint, MosaicPointJTS} +import com.databricks.labs.mosaic.core.geometry.polygon.{MosaicPolygon, MosaicPolygonJTS} import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.{MULTIPOINT, POINT} +import com.databricks.labs.mosaic.core.types.model._ import org.apache.spark.sql.catalyst.InternalRow import org.locationtech.jts.geom._ +import org.locationtech.jts.geom.util.{LinearComponentExtracter, PolygonExtracter} +import org.locationtech.jts.index.strtree.STRtree +import org.locationtech.jts.linearref.LengthIndexedLine +import org.locationtech.jts.triangulate.ConformingDelaunayTriangulationBuilder + +import scala.collection.JavaConverters._ class MosaicMultiPointJTS(multiPoint: MultiPoint) extends MosaicGeometryJTS(multiPoint) with MosaicMultiPoint { @@ -42,6 +50,108 @@ class MosaicMultiPointJTS(multiPoint: MultiPoint) extends MosaicGeometryJTS(mult override def getShellPoints: Seq[Seq[MosaicPointJTS]] = Seq(asSeq) + override def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double): Seq[MosaicPolygon] = { + val triangulator = new ConformingDelaunayTriangulationBuilder() + val geomFact = multiPoint.getFactory + + triangulator.setSites(multiPoint) + if (breaklines.nonEmpty) { + val multiLineString = MosaicMultiLineStringJTS.fromSeq(breaklines) + triangulator.setConstraints(multiLineString.getGeom) + } + triangulator.setTolerance(mergeTolerance) + + val trianglesGeomCollection = triangulator.getTriangles(geomFact) + val trianglePolygons = PolygonExtracter.getPolygons(trianglesGeomCollection).asScala.map(_.asInstanceOf[Polygon]) + + val postProcessedTrianglePolygons = postProcessTriangulation(trianglePolygons, MosaicMultiLineStringJTS.fromSeq(breaklines).getGeom, snapTolerance) + postProcessedTrianglePolygons.map(MosaicPolygonJTS(_)) + } + + /** Update Z values of the triangle vertices that have NaN Z values by interpolating from the constraint lines + * + * @param trianglePolygons: Sequence of triangles, output from the triangulation method + * @param constraintLineGeom: Geometry containing the constraint lines + * @param tolerance: Tolerance value for the triangulation, used to buffer points and match to constraint lines + * @return Sequence of triangles with updated Z values + * */ + private def postProcessTriangulation(trianglePolygons: Seq[Polygon], constraintLineGeom: Geometry, tolerance: Double): Seq[Polygon] = { + val geomFact = constraintLineGeom.getFactory + + val constraintLines = + LinearComponentExtracter.getLines(constraintLineGeom) + .iterator().asScala.toSeq + .map(_.asInstanceOf[LineString]) + + val constraintLinesTree = new STRtree(4) + constraintLines.foreach(l => constraintLinesTree.insert(l.getEnvelopeInternal, l)) + + trianglePolygons.map( + t => { + val coords = t.getCoordinates.map( + c => { + /* + * overwrite the z values for every coordinate lying + * within a fraction of the value of `tolerance`. + */ + val coordPoint = geomFact.createPoint(c) + val originatingLineString = constraintLinesTree.query(new Envelope(c)) + .iterator().asScala.toSeq + .map(_.asInstanceOf[LineString]) + .find(l => l.intersects(coordPoint.buffer(tolerance))) + originatingLineString match { + case Some(l) => + val indexedLine = new LengthIndexedLine(l) + val index = indexedLine.indexOf(c) + indexedLine.extractPoint(index) + case None => c + } + } + ) + geomFact.createPolygon(coords) + } + ) + } + + override def interpolateElevation(breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, mergeTolerance: Double, snapTolerance: Double): MosaicMultiPointJTS = { + val triangles = triangulate(breaklines, mergeTolerance, snapTolerance) + .asInstanceOf[Seq[MosaicPolygonJTS]] + + val tree = new STRtree(4) + triangles.foreach(p => tree.insert(p.getGeom.getEnvelopeInternal, p.getGeom)) + + val result = gridPoints.asSeq + .map(_.asInstanceOf[MosaicPointJTS]) + .map(p => { + val point = p.getGeom.asInstanceOf[Point] + point -> tree.query(p.getGeom.getEnvelopeInternal).asScala + .map(_.asInstanceOf[Polygon]) + .find(_.intersects(point)) + }).toMap + .collect({ case (pt, Some(ply)) => pt -> ply }) + .map({ + case (point: Point, poly: Polygon) => + val polyCoords = poly.getCoordinates + val tri = new Triangle(polyCoords(0), polyCoords(1), polyCoords(2)) + val z = tri.interpolateZ(point.getCoordinate) + if (z.isNaN) { throw new Exception("Interpolated Z value is NaN") } + val interpolatedPoint = MosaicPointJTS(point.getFactory.createPoint(new Coordinate(point.getX, point.getY, z))) + interpolatedPoint.setSpatialReference(getSpatialReference) + interpolatedPoint + }).toSeq + MosaicMultiPointJTS.fromSeq(result) + } + + override def pointGrid(origin: MosaicPoint, xCells: Int, yCells: Int, xSize: Double, ySize: Double): MosaicMultiPointJTS = { + val gridPoints = for (i <- 0 until xCells; j <- 0 until yCells) yield { + val x = origin.getX + i * xSize + val y = origin.getY + j * ySize + val gridPoint = MosaicPointJTS(multiPoint.getFactory.createPoint(new Coordinate(x, y))) + gridPoint.setSpatialReference(getSpatialReference) + gridPoint + } + MosaicMultiPointJTS.fromSeq(gridPoints) + } } object MosaicMultiPointJTS extends GeometryReader { diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterBandGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterBandGDAL.scala index 683d3791e..fb9edb14e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterBandGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterBandGDAL.scala @@ -68,6 +68,25 @@ case class MosaicRasterBandGDAL(band: Band, id: Int) { */ def dataType: Int = Try(band.getDataType).getOrElse(0) + /** + * @return + * Returns the band's data type as a string. + */ + def dataTypeHuman: String = Try(band.getDataType).getOrElse(0) match { + case gdalconstConstants.GDT_Byte => "Byte" + case gdalconstConstants.GDT_UInt16 => "UInt16" + case gdalconstConstants.GDT_Int16 => "Int16" + case gdalconstConstants.GDT_UInt32 => "UInt32" + case gdalconstConstants.GDT_Int32 => "Int32" + case gdalconstConstants.GDT_Float32 => "Float32" + case gdalconstConstants.GDT_Float64 => "Float64" + case gdalconstConstants.GDT_CInt16=> "ComplexInt16" + case gdalconstConstants.GDT_CInt32=> "ComplexInt32" + case gdalconstConstants.GDT_CFloat32=> "ComplexFloat32" + case gdalconstConstants.GDT_CFloat64=> "ComplexFloat64" + case _ => "Unknown" + } + /** * @return * Returns the band's x size. diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 252043ff3..b81958cd2 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -522,8 +522,8 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - // 0.4.2 - don't delete any fuse locations. - if (!PathUtils.isFuseLocation(path) && path != PathUtils.getCleanPath(parentPath)) { + // 0.4.4 - don't delete any checkpointing or fuse locations. + if (PathUtils.isTmpLocation(path)) { Try(gdal.GetDriverByName(getDriversShortName).Delete(path)) PathUtils.cleanUpPath(path) } diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/RasterTranslate/TranslateType.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/RasterTranslate/TranslateType.scala new file mode 100644 index 000000000..3afd271cd --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/RasterTranslate/TranslateType.scala @@ -0,0 +1,37 @@ +package com.databricks.labs.mosaic.core.raster.operator.RasterTranslate + +import com.databricks.labs.mosaic.core.raster.api.GDAL +import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL +import com.databricks.labs.mosaic.core.raster.operator.gdal.GDALTranslate +import com.databricks.labs.mosaic.utils.PathUtils + +object TranslateType { + + /** + * Converts the data type of a raster's bands + * + * @param raster + * The raster to update. + * @param newType + * The new data type of the raster. + * @return + * A MosaicRasterGDAL object. + */ + def update( + raster: MosaicRasterGDAL, + newType: String + ): MosaicRasterGDAL = { + val outShortName = raster.getDriversShortName + val resultFileName = PathUtils.createTmpFilePath(GDAL.getExtension(outShortName)) + val outOptions = raster.getWriteOptions + + val result = GDALTranslate.executeTranslate( + resultFileName, + raster, + command = s"gdal_translate -ot $newType", + outOptions + ) + + result + } +} diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala index ae03b2d01..41d98fbcf 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala @@ -63,7 +63,7 @@ object VectorClipper { val projectedGeom = geometry.osrTransformCRS(srcCrs, dstCrs, geometryAPI) - val geom = ogr.CreateGeometryFromWkb(projectedGeom.toWKB) + val geom = ogr.CreateGeometryFromWkb(projectedGeom.toWKB(2)) val geomLayer = shpDataSource.CreateLayer("geom") diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala new file mode 100644 index 000000000..be36d6af1 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala @@ -0,0 +1,155 @@ +package com.databricks.labs.mosaic.core.raster.operator.rasterize + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint +import com.databricks.labs.mosaic.core.raster.gdal.{MosaicRasterGDAL, MosaicRasterWriteOptions} +import com.databricks.labs.mosaic.core.raster.operator.gdal.OperatorOptions +import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum +import com.databricks.labs.mosaic.utils.PathUtils +import org.gdal.gdal.gdal +import org.gdal.gdalconst.gdalconstConstants +import org.gdal.ogr.ogr.{CreateGeometryFromWkb, GetDriverByName} +import org.gdal.ogr.ogrConstants.{OFTReal, wkbPoint, wkbPolygon} +import org.gdal.ogr.{DataSource, Feature, FieldDefn, ogr} + +import java.util.{Vector => JVector} +import scala.collection.JavaConverters._ + +object GDALRasterize { + + private val layerName = "FEATURES" + private val valueFieldName = "VALUES" + + /** + * Rasterize the geometries and values and writes these into a new raster file. + * + * @param geoms The geometries to rasterize. + * @param values The values to burn into the raster. If not supplied, the Z values of the geometries will be used. + * @param origin The origin (top left-hand coordinate) of the raster. + * @param xWidth The width of the raster in pixels. + * @param yWidth The height of the raster in pixels. + * @param xSize The pixel size for x-axis pixels. + * @param ySize The pixel size of y-axis pixels. + * @param noDataValue The NoData value to use. + * @return A MosaicRasterGDAL object containing the generated raster. + */ + def executeRasterize( + geoms: Seq[MosaicGeometry], + values: Option[Seq[Double]], + origin: MosaicPoint, + xWidth: Int, + yWidth: Int, + xSize: Double, + ySize: Double, + noDataValue: Int = (-99999) + ): MosaicRasterGDAL = { + + gdal.AllRegister() + val writeOptions = MosaicRasterWriteOptions.GTiff + val outputPath = PathUtils.createTmpFilePath(writeOptions.format) + val driver = gdal.GetDriverByName(writeOptions.format) + val createOptionsVec = new JVector[String]() + createOptionsVec.addAll(Seq("COMPRESS=LZW", "TILED=YES").asJavaCollection) + + val newRaster = driver.Create(outputPath, xWidth, yWidth, 1, gdalconstConstants.GDT_Float64, createOptionsVec) + val rasterCRS = if (geoms.isEmpty) origin.getSpatialReferenceOSR else geoms.head.getSpatialReferenceOSR + newRaster.SetSpatialRef(rasterCRS) + newRaster.SetGeoTransform(Array(origin.getX, xSize, 0.0, origin.getY, 0.0, ySize)) + + val outputBand = newRaster.GetRasterBand(1) + outputBand.SetNoDataValue(noDataValue) + outputBand.FlushCache() + + newRaster.FlushCache() + + if (geoms.isEmpty) { + + val errorMsg = "No geometries to rasterize." + newRaster.delete() + val createInfo = Map( + "path" -> outputPath, + "parentPath" -> "", + "driver" -> writeOptions.format, + "last_command" -> "", + "last_error" -> errorMsg, + "all_parents" -> "" + ) + return MosaicRasterGDAL.readRaster(createInfo) + } + + val valuesToBurn = values.getOrElse(geoms.map(_.getAnyPoint.getZ)) // can come back and make this the mean + val vecDataSource = writeToDataSource(geoms, valuesToBurn, None) + + val command = s"gdal_rasterize ATTRIBUTE=$valueFieldName" + val effectiveCommand = OperatorOptions.appendOptions(command, writeOptions) + val bands = Array(1) + val burnValues = Array(0.0) + val rasterizeOptionsVec = OperatorOptions.parseOptions(effectiveCommand) + gdal.RasterizeLayer(newRaster, bands, vecDataSource.GetLayer(0), burnValues, rasterizeOptionsVec) + outputBand.FlushCache() + + newRaster.FlushCache() + newRaster.delete() + val errorMsg = gdal.GetLastErrorMsg + val createInfo = Map( + "path" -> outputPath, + "parentPath" -> "", + "driver" -> writeOptions.format, + "last_command" -> effectiveCommand, + "last_error" -> errorMsg, + "all_parents" -> "" + ) + MosaicRasterGDAL.readRaster(createInfo) + } + + + /** + * Writes the geometries and values to a DataSource object. + * + * @param geoms The geometries to write to the DataSource. + * @param valuesToBurn The values to burn into the raster. + * @param geometryType The type of geometry to write to the DataSource. + * @param format The format of the DataSource (driver the should be used). + * @param path The path to write the DataSource to. + * @return A DataSource object containing the geometries and values. + */ + def writeToDataSource( + geoms: Seq[MosaicGeometry], + valuesToBurn: Seq[Double], + geometryType: Option[GeometryTypeEnum.Value], + format: String="Memory", + path: String="mem" + ): DataSource = { + ogr.RegisterAll() + + val vecDriver = GetDriverByName(format) + val vecDataSource = vecDriver.CreateDataSource(path) + + val ogrGeometryType = geometryType.getOrElse(GeometryTypeEnum.fromString(geoms.head.getGeometryType)) match { + case GeometryTypeEnum.POINT => wkbPoint + case GeometryTypeEnum.POLYGON => wkbPolygon + case _ => throw new UnsupportedOperationException("Only Point and Polygon geometries are supported for rasterization.") + } + + val layer = vecDataSource.CreateLayer(layerName, geoms.head.getSpatialReferenceOSR, ogrGeometryType) + + val attributeField = new FieldDefn(valueFieldName, OFTReal) + layer.CreateField(attributeField) + + geoms + .zip(valuesToBurn) + .foreach({ case (g: MosaicGeometry, v: Double) => + val geom = CreateGeometryFromWkb(g.toWKB) + val featureDefn = layer.GetLayerDefn() + val feature = new Feature(featureDefn) + feature.SetGeometry(geom) + feature.SetField(valueFieldName, v) + layer.CreateFeature(feature) + }) + + layer.SyncToDisk() + layer.delete() + vecDataSource + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/datasource/OGRFileFormat.scala b/src/main/scala/com/databricks/labs/mosaic/datasource/OGRFileFormat.scala index f96664d61..e3e1416b1 100644 --- a/src/main/scala/com/databricks/labs/mosaic/datasource/OGRFileFormat.scala +++ b/src/main/scala/com/databricks/labs/mosaic/datasource/OGRFileFormat.scala @@ -339,7 +339,7 @@ object OGRFileFormat extends Serializable { .map(feature.GetGeomFieldRef) .flatMap(f => { if (Option(f).isDefined) { - f.FlattenTo2D() + //f.FlattenTo2D() Seq( if (asWKB) f.ExportToWkb else f.ExportToWkt, Try(f.GetSpatialReference.GetAuthorityCode(null)).getOrElse("0") diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala new file mode 100644 index 000000000..04387d2f1 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala @@ -0,0 +1,140 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString +import com.databricks.labs.mosaic.core.geometry.multipoint.MosaicMultiPoint +import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint +import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum._ +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression} +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types._ + +import java.util.Locale + +case class ST_InterpolateElevation( + pointsArray: Expression, + linesArray: Expression, + mergeTolerance: Expression, + snapTolerance: Expression, + gridOrigin: Expression, + gridWidthX: Expression, + gridWidthY: Expression, + gridSizeX: Expression, + gridSizeY: Expression, + expressionConfig: MosaicExpressionConfig +) extends CollectionGenerator with Serializable with CodegenFallback { + override def position: Boolean = false + + override def inline: Boolean = false + + override def elementSchema: StructType = StructType(Seq(StructField("geom", firstElementType))) + + def firstElementType: DataType = pointsArray.dataType.asInstanceOf[ArrayType].elementType + def secondElementType: DataType = linesArray.dataType.asInstanceOf[ArrayType].elementType + + def getGeometryAPI(expressionConfig: MosaicExpressionConfig): GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI) + + def geometryAPI: GeometryAPI = getGeometryAPI(expressionConfig) + + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + val pointsGeom = + pointsArray + .eval(input) + .asInstanceOf[ArrayData] + .toObjectArray(firstElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, firstElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "POINT" => g.asInstanceOf[MosaicPoint] + case _ => throw new UnsupportedOperationException("ST_InterpolateElevation requires Point geometry as masspoints input") + } + }) + + val multiPointGeom = geometryAPI.fromSeq(pointsGeom, MULTIPOINT).asInstanceOf[MosaicMultiPoint] + val linesGeom = + linesArray + .eval(input) + .asInstanceOf[ArrayData] + .toObjectArray(firstElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, firstElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "LINESTRING" => g.asInstanceOf[MosaicLineString] + case _ => throw new UnsupportedOperationException("ST_InterpolateElevation requires LineString geometry as breaklines input") + } + }) + + val origin = geometryAPI.geometry(gridOrigin.eval(input), gridOrigin.dataType).asInstanceOf[MosaicPoint] + val gridWidthXValue = gridWidthX.eval(input).asInstanceOf[Int] + val gridWidthYValue = gridWidthY.eval(input).asInstanceOf[Int] + val gridSizeXValue = gridSizeX.eval(input).asInstanceOf[Double] + val gridSizeYValue = gridSizeY.eval(input).asInstanceOf[Double] + val mergeToleranceValue = mergeTolerance.eval(input).asInstanceOf[Double] + val snapToleranceValue = snapTolerance.eval(input).asInstanceOf[Double] + + val gridPoints = multiPointGeom.pointGrid(origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue) + + val interpolatedPoints = multiPointGeom + .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue) + .asSeq + + val serializedPoints = interpolatedPoints + .map(geometryAPI.serialize(_, firstElementType)) + + val outputRows = serializedPoints + .map(g => InternalRow.fromSeq(Seq(g))) + + outputRows + } + + override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance, gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY) + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { + copy( + pointsArray = newChildren(0), + linesArray = newChildren(1), + mergeTolerance = newChildren(2), + snapTolerance = newChildren(3), + gridOrigin = newChildren(4), + gridWidthX = newChildren(5), + gridWidthY = newChildren(6), + gridSizeX = newChildren(7), + gridSizeY = newChildren(8) + ) + } +} + +object ST_InterpolateElevation extends WithExpressionInfo { + + override def name: String = "st_interpolateelevation" + + override def usage: String = { + "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9) - Returns the interpolated heights " + + "of the points in the grid defined by `expr5`, `expr6`, `expr7`, `expr8` and `expr9`" + + "in the triangulated irregular network formed from the points in `expr1` " + + "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4`." + } + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i); + | Point Z (...) + | Point Z (...) + | ... + | Point Z (...) + | """.stripMargin + + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + GenericExpressionFactory.getBaseBuilder[ST_InterpolateElevation](9, expressionConfig) + } + +} \ No newline at end of file diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala new file mode 100644 index 000000000..621ac1033 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala @@ -0,0 +1,114 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString +import com.databricks.labs.mosaic.core.geometry.multipoint.MosaicMultiPoint +import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint +import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum._ +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types.{ArrayType, DataType, StructField, StructType} + +import java.util.Locale + +case class ST_Triangulate ( + pointsArray: Expression, + linesArray: Expression, + mergeTolerance: Expression, + snapTolerance: Expression, + expressionConfig: MosaicExpressionConfig + ) + extends CollectionGenerator + with Serializable + with CodegenFallback { + + + override def position: Boolean = false + + override def inline: Boolean = false + + override def elementSchema: StructType = StructType(Seq(StructField("triangles", firstElementType))) + + def firstElementType: DataType = pointsArray.dataType.asInstanceOf[ArrayType].elementType + + def secondElementType: DataType = linesArray.dataType.asInstanceOf[ArrayType].elementType + + def getGeometryAPI(expressionConfig: MosaicExpressionConfig): GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI) + + def geometryAPI: GeometryAPI = getGeometryAPI(expressionConfig) + + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + val pointsGeom = + pointsArray + .eval(input) + .asInstanceOf[ArrayData] + .toObjectArray(firstElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, firstElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "POINT" => g.asInstanceOf[MosaicPoint] + case _ => throw new UnsupportedOperationException("ST_Triangulate requires Point geometry as masspoints input") + } + }) + + val multiPointGeom = geometryAPI.fromSeq(pointsGeom, MULTIPOINT).asInstanceOf[MosaicMultiPoint] + val linesGeom = + linesArray + .eval(input) + .asInstanceOf[ArrayData] + .toObjectArray(secondElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, secondElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "LINESTRING" => g.asInstanceOf[MosaicLineString] + case _ => throw new UnsupportedOperationException("ST_Triangulate requires LINESTRING geometry as breakline input") + } + }) + + val mergeToleranceVal = mergeTolerance.eval(input).asInstanceOf[Double] + val snapToleranceVal = snapTolerance.eval(input).asInstanceOf[Double] + + val triangles = multiPointGeom.triangulate(linesGeom, mergeToleranceVal, snapToleranceVal) + + val outputGeoms = triangles.map( + geometryAPI.serialize(_, firstElementType) + ) + val outputRows = outputGeoms.map(t => InternalRow.fromSeq(Seq(t))) + outputRows + } + + override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance) + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = + copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3)) +} + + +object ST_Triangulate extends WithExpressionInfo { + + override def name: String = "st_triangulate" + + override def usage: String = "_FUNC_(expr1, expr2, expr3, expr4) - Returns the triangulated irregular network of the points in `expr1` including `expr2` as breaklines with tolerance parameters `expr3` and `expr4`." + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(a, b, c, d); + | Point Z (...) + | Point Z (...) + | ... + | Point Z (...) + | """.stripMargin + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + GenericExpressionFactory.getBaseBuilder[ST_Triangulate](4, expressionConfig) + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala new file mode 100644 index 000000000..41ec9c814 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala @@ -0,0 +1,152 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString +import com.databricks.labs.mosaic.core.geometry.multipoint.MosaicMultiPoint +import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint +import com.databricks.labs.mosaic.core.raster.api.GDAL +import com.databricks.labs.mosaic.core.raster.operator.rasterize.GDALRasterize +import com.databricks.labs.mosaic.core.types.RasterTileType +import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.MULTIPOINT +import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.expressions.raster.base.RasterExpressionSerialization +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types.{ArrayType, DataType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +import java.util.Locale + +case class RST_DTMFromGeoms( + pointsArray: Expression, + linesArray: Expression, + mergeTolerance: Expression, + snapTolerance: Expression, + gridOrigin: Expression, + gridWidthX: Expression, + gridWidthY: Expression, + gridSizeX: Expression, + gridSizeY: Expression, + expressionConfig: MosaicExpressionConfig + ) extends Expression with Serializable with RasterExpressionSerialization with CodegenFallback +{ + GDAL.enable(expressionConfig) + + override def nullable: Boolean = false + + def firstElementType: DataType = pointsArray.dataType.asInstanceOf[ArrayType].elementType + def secondElementType: DataType = linesArray.dataType.asInstanceOf[ArrayType].elementType + + def getGeometryAPI(expressionConfig: MosaicExpressionConfig): GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI) + + def geometryAPI: GeometryAPI = getGeometryAPI(expressionConfig) + + override def eval(input: InternalRow): Any = { + val pointsGeom = + pointsArray + .eval(input) + .asInstanceOf[ArrayData] + .toObjectArray(firstElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, firstElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "POINT" => g.asInstanceOf[MosaicPoint] + case _ => throw new UnsupportedOperationException("RST_DTMFromGeoms requires Point geometry as masspoints input") + } + }) + + val multiPointGeom = geometryAPI.fromSeq(pointsGeom, MULTIPOINT).asInstanceOf[MosaicMultiPoint] + val linesArrayData = linesArray + .eval(input) + .asInstanceOf[ArrayData] + + + val linesGeom = if (linesArrayData == null) { + Array(geometryAPI.geometry(UTF8String.fromString("LINESTRING EMPTY"), StringType).asInstanceOf[MosaicLineString]) + } else { + linesArrayData + .toObjectArray(secondElementType) + .map({ + obj => + val g = geometryAPI.geometry(obj, secondElementType) + g.getGeometryType.toUpperCase(Locale.ROOT) match { + case "LINESTRING" => g.asInstanceOf[MosaicLineString] + case _ => throw new UnsupportedOperationException("RST_DTMFromGeoms requires LineString geometry as breaklines input") + } + }) + } + + val origin = geometryAPI.geometry(gridOrigin.eval(input), gridOrigin.dataType).asInstanceOf[MosaicPoint] + val gridWidthXValue = gridWidthX.eval(input).asInstanceOf[Int] + val gridWidthYValue = gridWidthY.eval(input).asInstanceOf[Int] + val gridSizeXValue = gridSizeX.eval(input).asInstanceOf[Double] + val gridSizeYValue = gridSizeY.eval(input).asInstanceOf[Double] + val mergeToleranceValue = mergeTolerance.eval(input).asInstanceOf[Double] + val snapToleranceValue = snapTolerance.eval(input).asInstanceOf[Double] + + val gridPoints = multiPointGeom.pointGrid(origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue) + + val interpolatedPoints = multiPointGeom + .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue) + .asSeq + + val outputRaster = GDALRasterize.executeRasterize( + interpolatedPoints, None, origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue + ) + + val outputRow = MosaicRasterTile(null, outputRaster).serialize(StringType) + outputRow + } + + override def dataType: DataType = RasterTileType( + expressionConfig.getCellIdType, StringType, expressionConfig.isRasterUseCheckpoint) + + override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance, gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY) + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { + copy( + pointsArray = newChildren(0), + linesArray = newChildren(1), + mergeTolerance = newChildren(2), + snapTolerance = newChildren(3), + gridOrigin = newChildren(4), + gridWidthX = newChildren(5), + gridWidthY = newChildren(6), + gridSizeX = newChildren(7), + gridSizeY = newChildren(8) + ) + } + + override def canEqual(that: Any): Boolean = false +} + +object RST_DTMFromGeoms extends WithExpressionInfo { + + override def name: String = "rst_dtmfromgeoms" + + override def usage: String = { + "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9) - Returns the interpolated heights " + + "of the points in the grid defined by `expr5`, `expr6`, `expr7`, `expr8` and `expr9`" + + "in the triangulated irregular network formed from the points in `expr1` " + + "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4` as a raster in GeoTIFF format." + } + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i); + | {index_id, raster_tile, parentPath, driver} + | """.stripMargin + + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + GenericExpressionFactory.getBaseBuilder[RST_DTMFromGeoms](9, expressionConfig) + } + +} \ No newline at end of file diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala index d8db6879d..fc02da03d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala @@ -95,7 +95,7 @@ case class RST_DerivedBandAgg( val result = MosaicRasterTile(idx, combined) .formatCellId(IndexSystemFactory.getIndexSystem(expressionConfig.getIndexSystem)) - .serialize(BinaryType) + .serialize(rasterType) tiles.foreach(RasterCleaner.dispose(_)) RasterCleaner.dispose(result) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala index dd3d91ef4..65c9952d2 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala @@ -86,7 +86,7 @@ case class RST_MergeAgg( val result = MosaicRasterTile(idx, merged) .formatCellId(IndexSystemFactory.getIndexSystem(expressionConfig.getIndexSystem)) - .serialize(BinaryType) + .serialize(rasterType) tiles.foreach(RasterCleaner.dispose(_)) RasterCleaner.dispose(merged) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala index 27eecdedd..8efe3b977 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala @@ -23,9 +23,6 @@ case class RST_ReTile( with NullIntolerant with CodegenFallback { - /** @return provided raster data type (assumes that was handled for checkpointing.)*/ - override def dataType: DataType = rasterExpr.dataType - /** * Returns a set of new rasters with the specified tile size (tileWidth x * tileHeight). diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Transform.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Transform.scala index 2781c8364..cb4bd8d8f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Transform.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Transform.scala @@ -59,7 +59,7 @@ object RST_Transform extends WithExpressionInfo { | """.stripMargin override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { - GenericExpressionFactory.getBaseBuilder[RST_Avg](1, expressionConfig) + GenericExpressionFactory.getBaseBuilder[RST_Transform](2, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Type.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Type.scala new file mode 100644 index 000000000..896bf152c --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Type.scala @@ -0,0 +1,52 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.expressions.raster.base.RasterExpression +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** Returns the data type of the raster. */ +case class RST_Type(raster: Expression, expressionConfig: MosaicExpressionConfig) + extends RasterExpression[RST_Type](raster, returnsRaster = false, expressionConfig) + with NullIntolerant + with CodegenFallback { + + override def dataType: DataType = ArrayType(StringType) + + /** Returns the data type of the raster. */ + override def rasterTransform(tile: MosaicRasterTile): Any = { + //loop over each band in the raster and get the data type + val dataTypeStrings = tile.getRaster.getBands.map(_.dataTypeHuman).map(UTF8String.fromString) + ArrayData.toArrayData(dataTypeStrings.toArray) + } + +} + +/** Expression info required for the expression registration for spark SQL. */ +object RST_Type extends WithExpressionInfo { + + override def name: String = "rst_type" + + override def usage: String = + """ + |_FUNC_(expr1) - Returns an array of data types for each band in the raster tile. + |""".stripMargin + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(raster_tile); + | [UInt16] + | """.stripMargin + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + GenericExpressionFactory.getBaseBuilder[RST_Type](1, expressionConfig) + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateType.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateType.scala new file mode 100644 index 000000000..821b051d0 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateType.scala @@ -0,0 +1,62 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.raster.api.GDAL +import com.databricks.labs.mosaic.core.raster.operator.RasterTranslate.TranslateType +import com.databricks.labs.mosaic.core.types.RasterTileType +import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.expressions.raster.base.Raster1ArgExpression +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.DataType +import org.apache.spark.unsafe.types.UTF8String + +case class RST_UpdateType ( + tileExpr: Expression, + newType: Expression, + expressionConfig: MosaicExpressionConfig + ) extends Raster1ArgExpression[RST_UpdateType]( + tileExpr, + newType, + returnsRaster = true, + expressionConfig +) + with NullIntolerant + with CodegenFallback { + + override def dataType: DataType = { + GDAL.enable(expressionConfig) + RasterTileType(expressionConfig.getCellIdType, tileExpr, expressionConfig.isRasterUseCheckpoint) + } + + /** Changes the data type of a band of the raster. */ + override def rasterTransform(tile: MosaicRasterTile, arg1: Any): Any = { + + val newType = arg1.asInstanceOf[UTF8String].toString + val result = TranslateType.update(tile.getRaster, newType) + tile.copy(raster = result) + } + +} + +/** Expression info required for the expression registration for spark SQL. */ +object RST_UpdateType extends WithExpressionInfo { + + override def name: String = "rst_updatetype" + + override def usage: String = "_FUNC_(expr1) - Returns a raster tile with an updated data type" + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(tile, 'Float32') + | {index_id, updated_raster, parentPath, driver} + | """.stripMargin + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + GenericExpressionFactory.getBaseBuilder[RST_UpdateType](2, expressionConfig) + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 9736d9081..b0be5ed9f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -8,6 +8,7 @@ import com.databricks.labs.mosaic.core.types.ChipType import com.databricks.labs.mosaic.datasource.multiread.MosaicDataFrameReader import com.databricks.labs.mosaic.expressions.constructors._ import com.databricks.labs.mosaic.expressions.format._ +import com.databricks.labs.mosaic.expressions.geometry import com.databricks.labs.mosaic.expressions.geometry.ST_MinMaxXYZ._ import com.databricks.labs.mosaic.expressions.geometry._ import com.databricks.labs.mosaic.expressions.index._ @@ -171,6 +172,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[ST_Envelope](expressionConfig) mosaicRegistry.registerExpression[ST_GeometryType](expressionConfig) mosaicRegistry.registerExpression[ST_HasValidCoordinates](expressionConfig) + mosaicRegistry.registerExpression[ST_InterpolateElevation](expressionConfig) mosaicRegistry.registerExpression[ST_Intersection](expressionConfig) mosaicRegistry.registerExpression[ST_Intersects](expressionConfig) mosaicRegistry.registerExpression[ST_IsValid](expressionConfig) @@ -190,6 +192,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[ST_SRID](expressionConfig) mosaicRegistry.registerExpression[ST_Translate](expressionConfig) mosaicRegistry.registerExpression[ST_Transform](expressionConfig) + mosaicRegistry.registerExpression[ST_Triangulate](expressionConfig) mosaicRegistry.registerExpression[ST_UnaryUnion](expressionConfig) mosaicRegistry.registerExpression[ST_Union](expressionConfig) mosaicRegistry.registerExpression[ST_UpdateSRID](expressionConfig) @@ -280,6 +283,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[RST_CombineAvg](expressionConfig) mosaicRegistry.registerExpression[RST_Convolve](expressionConfig) mosaicRegistry.registerExpression[RST_DerivedBand](expressionConfig) + mosaicRegistry.registerExpression[RST_DTMFromGeoms](expressionConfig) mosaicRegistry.registerExpression[RST_Filter](expressionConfig) mosaicRegistry.registerExpression[RST_GeoReference](expressionConfig) mosaicRegistry.registerExpression[RST_GetNoData](expressionConfig) @@ -323,11 +327,13 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[RST_Summary](expressionConfig) mosaicRegistry.registerExpression[RST_Tessellate](expressionConfig) mosaicRegistry.registerExpression[RST_Transform](expressionConfig) + mosaicRegistry.registerExpression[RST_Type](expressionConfig) mosaicRegistry.registerExpression[RST_FromContent](expressionConfig) mosaicRegistry.registerExpression[RST_FromFile](expressionConfig) mosaicRegistry.registerExpression[RST_ToOverlappingTiles](expressionConfig) mosaicRegistry.registerExpression[RST_TryOpen](expressionConfig) mosaicRegistry.registerExpression[RST_Subdivide](expressionConfig) + mosaicRegistry.registerExpression[RST_UpdateType](expressionConfig) mosaicRegistry.registerExpression[RST_UpperLeftX](expressionConfig) mosaicRegistry.registerExpression[RST_UpperLeftY](expressionConfig) mosaicRegistry.registerExpression[RST_Width](expressionConfig) @@ -617,6 +623,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def st_geometrytype(geom: Column): Column = ColumnAdapter(ST_GeometryType(geom.expr, expressionConfig)) def st_hasvalidcoordinates(geom: Column, crsCode: Column, which: Column): Column = ColumnAdapter(ST_HasValidCoordinates(geom.expr, crsCode.expr, which.expr, expressionConfig)) + def st_interpolateelevation(pointsArray: Column, linesArray: Column, mergetol: Column, snaptol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = + ColumnAdapter(geometry.ST_InterpolateElevation(pointsArray.expr, linesArray.expr, mergetol.expr, snaptol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) def st_intersection(left: Column, right: Column): Column = ColumnAdapter(ST_Intersection(left.expr, right.expr, expressionConfig)) def st_isvalid(geom: Column): Column = ColumnAdapter(ST_IsValid(geom.expr, expressionConfig)) def st_length(geom: Column): Column = ColumnAdapter(ST_Length(geom.expr, expressionConfig)) @@ -638,6 +646,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def st_transform(geom: Column, srid: Column): Column = ColumnAdapter(ST_Transform(geom.expr, srid.expr, expressionConfig)) def st_translate(geom1: Column, xd: Column, yd: Column): Column = ColumnAdapter(ST_Translate(geom1.expr, xd.expr, yd.expr, expressionConfig)) + def st_triangulate(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column): Column = + ColumnAdapter(ST_Triangulate(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, expressionConfig)) def st_x(geom: Column): Column = ColumnAdapter(ST_X(geom.expr, expressionConfig)) def st_y(geom: Column): Column = ColumnAdapter(ST_Y(geom.expr, expressionConfig)) def st_z(geom: Column): Column = ColumnAdapter(ST_Z(geom.expr, expressionConfig)) @@ -691,6 +701,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_boundingbox(raster: Column): Column = ColumnAdapter(RST_BoundingBox(raster.expr, expressionConfig)) def rst_clip(raster: Column, geometry: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, expressionConfig)) def rst_convolve(raster: Column, kernel: Column): Column = ColumnAdapter(RST_Convolve(raster.expr, kernel.expr, expressionConfig)) + def rst_dtmfromgeoms(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = + ColumnAdapter(RST_DTMFromGeoms(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) def rst_pixelcount(raster: Column): Column = ColumnAdapter(RST_PixelCount(raster.expr, expressionConfig)) def rst_combineavg(rasterArray: Column): Column = ColumnAdapter(RST_CombineAvg(rasterArray.expr, expressionConfig)) def rst_derivedband(raster: Column, pythonFunc: Column, funcName: Column): Column = @@ -777,6 +789,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_transform(raster: Column, srid: Column): Column = ColumnAdapter(RST_Transform(raster.expr, srid.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) + def rst_type(raster: Column): Column = ColumnAdapter(RST_Type(raster.expr, expressionConfig)) def rst_fromcontent(raster: Column, driver: Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver: Column, sizeInMB: Column): Column = @@ -799,6 +812,10 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Subdivide(raster.expr, sizeInMB.expr, expressionConfig)) def rst_subdivide(raster: Column, sizeInMB: Int): Column = ColumnAdapter(RST_Subdivide(raster.expr, lit(sizeInMB).expr, expressionConfig)) + def rst_updatetype(raster: Column, dataType: Column): Column = + ColumnAdapter(RST_UpdateType(raster.expr, dataType.expr, expressionConfig)) + def rst_updatetype(raster: Column, dataType: String): Column = + ColumnAdapter(RST_UpdateType(raster.expr, lit(dataType).expr, expressionConfig)) def rst_upperleftx(raster: Column): Column = ColumnAdapter(RST_UpperLeftX(raster.expr, expressionConfig)) def rst_upperlefty(raster: Column): Column = ColumnAdapter(RST_UpperLeftY(raster.expr, expressionConfig)) def rst_width(raster: Column): Column = ColumnAdapter(RST_Width(raster.expr, expressionConfig)) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicExpressionConfig.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicExpressionConfig.scala index 9a76c29d2..369743494 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicExpressionConfig.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicExpressionConfig.scala @@ -48,6 +48,8 @@ case class MosaicExpressionConfig(configs: Map[String, String]) { def getRasterUseCheckpoint: String = configs.getOrElse(MOSAIC_RASTER_USE_CHECKPOINT, MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT) + def getTmpPrefix: String = configs.getOrElse(MOSAIC_RASTER_TMP_PREFIX, MOSAIC_RASTER_TMP_PREFIX_DEFAULT) + def isRasterUseCheckpoint: Boolean = { Try(getRasterUseCheckpoint == "true").getOrElse(false) } @@ -57,8 +59,6 @@ case class MosaicExpressionConfig(configs: Map[String, String]) { def getIndexSystem: String = configs.getOrElse(MOSAIC_INDEX_SYSTEM, H3.name) def getRasterBlockSize: Int = configs.getOrElse(MOSAIC_RASTER_BLOCKSIZE, MOSAIC_RASTER_BLOCKSIZE_DEFAULT).toInt - - def getTmpPrefix: String = configs.getOrElse(MOSAIC_RASTER_TMP_PREFIX, "/tmp") def setGDALConf(conf: RuntimeConfig): MosaicExpressionConfig = { val toAdd = conf.getAll.filter(_._1.startsWith(MOSAIC_GDAL_PREFIX)) @@ -108,7 +108,7 @@ object MosaicExpressionConfig { .setIndexSystem(spark.conf.get(MOSAIC_INDEX_SYSTEM, H3.name)) .setRasterCheckpoint(spark.conf.get(MOSAIC_RASTER_CHECKPOINT, MOSAIC_RASTER_CHECKPOINT_DEFAULT)) .setRasterUseCheckpoint(spark.conf.get(MOSAIC_RASTER_USE_CHECKPOINT, MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT)) - .setTmpPrefix(spark.conf.get(MOSAIC_RASTER_TMP_PREFIX, "/tmp")) + .setTmpPrefix(spark.conf.get(MOSAIC_RASTER_TMP_PREFIX, MOSAIC_RASTER_TMP_PREFIX_DEFAULT)) .setGDALConf(spark.conf) } diff --git a/src/main/scala/com/databricks/labs/mosaic/gdal/MosaicGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/gdal/MosaicGDAL.scala index 12986d601..f9dfcddef 100644 --- a/src/main/scala/com/databricks/labs/mosaic/gdal/MosaicGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/gdal/MosaicGDAL.scala @@ -55,7 +55,7 @@ object MosaicGDAL extends Logging { val CPL_TMPDIR = MosaicContext.tmpDir(mosaicConfig) val GDAL_PAM_PROXY_DIR = MosaicContext.tmpDir(mosaicConfig) gdal.SetConfigOption("GDAL_VRT_ENABLE_PYTHON", "YES") - gdal.SetConfigOption("GDAL_DISABLE_READDIR_ON_OPEN", "TRUE") + gdal.SetConfigOption("GDAL_DISABLE_READDIR_ON_OPEN", "EMPTY_DIR") gdal.SetConfigOption("CPL_TMPDIR", CPL_TMPDIR) gdal.SetConfigOption("GDAL_PAM_PROXY_DIR", GDAL_PAM_PROXY_DIR) gdal.SetConfigOption("GDAL_PAM_ENABLED", "YES") diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index 4e4716fab..06dfa3264 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -23,8 +23,9 @@ package object mosaic { val MOSAIC_RASTER_CHECKPOINT = "spark.databricks.labs.mosaic.raster.checkpoint" val MOSAIC_RASTER_CHECKPOINT_DEFAULT = "/dbfs/tmp/mosaic/raster/checkpoint" val MOSAIC_RASTER_USE_CHECKPOINT = "spark.databricks.labs.mosaic.raster.use.checkpoint" - val MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT = "false" + val MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT = "true" val MOSAIC_RASTER_TMP_PREFIX = "spark.databricks.labs.mosaic.raster.tmp.prefix" + val MOSAIC_RASTER_TMP_PREFIX_DEFAULT = "/tmp" val MOSAIC_RASTER_BLOCKSIZE = "spark.databricks.labs.mosaic.raster.blocksize" val MOSAIC_RASTER_BLOCKSIZE_DEFAULT = "128" diff --git a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala index cbdb1b417..6cffc17c5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala +++ b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.utils +import com.databricks.labs.mosaic.MOSAIC_RASTER_TMP_PREFIX_DEFAULT import com.databricks.labs.mosaic.functions.MosaicContext import java.nio.file.{Files, Path, Paths} @@ -209,6 +210,18 @@ object PathUtils { isFuse } + /** + * Test for whether path is in the temp location. + * @param path + * Provided path. + * @return + * True if path is in a temp location. + */ + def isTmpLocation(path: String): Boolean = { + val p = getCleanPath(path) + p.startsWith(MOSAIC_RASTER_TMP_PREFIX_DEFAULT) + } + /** * Is the path a subdataset? * - Known by ":" after the filename. diff --git a/src/test/resources/binary/elevation/sd46_dtm_breakline.dbf b/src/test/resources/binary/elevation/sd46_dtm_breakline.dbf new file mode 100644 index 000000000..d312df937 Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_breakline.dbf differ diff --git a/src/test/resources/binary/elevation/sd46_dtm_breakline.prj b/src/test/resources/binary/elevation/sd46_dtm_breakline.prj new file mode 100644 index 000000000..7b49fabcd --- /dev/null +++ b/src/test/resources/binary/elevation/sd46_dtm_breakline.prj @@ -0,0 +1 @@ +PROJCS["British_National_Grid",GEOGCS["GCS_OSGB_1936",DATUM["D_OSGB_1936",SPHEROID["Airy_1830",6377563.396,299.3249646]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",400000],PARAMETER["False_Northing",-100000],PARAMETER["Central_Meridian",-2],PARAMETER["Scale_Factor",0.999601272],PARAMETER["Latitude_Of_Origin",49],UNIT["Meter",1]] \ No newline at end of file diff --git a/src/test/resources/binary/elevation/sd46_dtm_breakline.shp b/src/test/resources/binary/elevation/sd46_dtm_breakline.shp new file mode 100644 index 000000000..0fd79fe4a Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_breakline.shp differ diff --git a/src/test/resources/binary/elevation/sd46_dtm_breakline.shx b/src/test/resources/binary/elevation/sd46_dtm_breakline.shx new file mode 100644 index 000000000..26111bac6 Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_breakline.shx differ diff --git a/src/test/resources/binary/elevation/sd46_dtm_point.dbf b/src/test/resources/binary/elevation/sd46_dtm_point.dbf new file mode 100644 index 000000000..ca36f5247 Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_point.dbf differ diff --git a/src/test/resources/binary/elevation/sd46_dtm_point.prj b/src/test/resources/binary/elevation/sd46_dtm_point.prj new file mode 100644 index 000000000..7b49fabcd --- /dev/null +++ b/src/test/resources/binary/elevation/sd46_dtm_point.prj @@ -0,0 +1 @@ +PROJCS["British_National_Grid",GEOGCS["GCS_OSGB_1936",DATUM["D_OSGB_1936",SPHEROID["Airy_1830",6377563.396,299.3249646]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",400000],PARAMETER["False_Northing",-100000],PARAMETER["Central_Meridian",-2],PARAMETER["Scale_Factor",0.999601272],PARAMETER["Latitude_Of_Origin",49],UNIT["Meter",1]] \ No newline at end of file diff --git a/src/test/resources/binary/elevation/sd46_dtm_point.shp b/src/test/resources/binary/elevation/sd46_dtm_point.shp new file mode 100644 index 000000000..9749bd0ad Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_point.shp differ diff --git a/src/test/resources/binary/elevation/sd46_dtm_point.shx b/src/test/resources/binary/elevation/sd46_dtm_point.shx new file mode 100644 index 000000000..a79742f10 Binary files /dev/null and b/src/test/resources/binary/elevation/sd46_dtm_point.shx differ diff --git a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626950.0440469-3609-11-041ac051-015d-49b0-95df-b5daa7084c7e.grb.aux.xml b/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626950.0440469-3609-11-041ac051-015d-49b0-95df-b5daa7084c7e.grb.aux.xml deleted file mode 100644 index 8b9237893..000000000 --- a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626950.0440469-3609-11-041ac051-015d-49b0-95df-b5daa7084c7e.grb.aux.xml +++ /dev/null @@ -1,342 +0,0 @@ - - - 1[-] HYBL="Hybrid level" - - - 1.136666106290528e-06 - 1.200369887769461e-06 - 196 - 0 - 0 - 1|1|0|1|1|2|7|1|0|0|0|1|1|1|2|1|3|3|3|4|6|2|1|0|0|0|0|0|0|0|0|0|0|0|0|1|0|1|1|2|2|6|5|1|1|2|0|1|0|1|1|0|1|1|1|1|0|1|0|1|1|2|7|1|0|0|0|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|1|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|0|1|0|0|0|0|0|1|0|1|0|1|1|1|5|2|1|1|0|0|0|0|0|1|0|1|0|1|1|1|4|3|1|1|0|0|0|0|1|0|0|1|1|0|1|2|3|3|1|1|0|0|0|0|1|0|1|0|1|1|1|1|2|4|1|1|0|0|0|1|0|1|0|1|0|1|1|1|2|3|2|1|0|0|1|0|1|0|1|0|1|0|1|1|2|3|2|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622678400 sec UTC - 1-HYBL - [-] - 1622678400 sec UTC - 1.2002082030449e-06 - 1.1662431895312e-06 - 1.1368277910151e-06 - 1.9422780853555e-08 - 100 - - - -47 - 0 - 0 - 24 - 0.0000011368 - - - - 1[-] HYBL="Hybrid level" - - - 1.143818000822908e-06 - 1.20314421841223e-06 - 196 - 0 - 0 - 3|2|1|1|2|5|0|0|0|0|1|3|1|2|2|4|1|0|3|2|1|1|3|4|0|0|0|0|0|0|0|0|4|1|1|1|2|4|1|0|0|0|0|0|0|0|0|0|0|0|0|3|3|5|7|1|1|0|2|2|4|0|0|0|0|0|0|0|0|0|0|0|3|1|1|1|0|1|1|2|4|0|0|0|0|0|0|0|3|1|1|0|1|0|1|1|2|2|2|0|0|0|0|0|3|1|1|0|1|0|1|1|0|1|3|2|0|0|0|0|0|3|1|1|1|0|1|0|1|1|0|4|1|0|0|0|0|1|3|1|0|1|1|0|1|1|0|2|3|0|0|0|0|0|3|2|0|1|1|0|1|1|0|2|3|0|0|0|0|0|0|4|1|1|0|1|1|0|1|4|1|0|0|0|0|0|0|4|1|1|0|1|1|0|3|3 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622764800 sec UTC - 1-HYBL - [-] - 1622764800 sec UTC - 1.2029936442559e-06 - 1.1711197875953e-06 - 1.1439685749792e-06 - 1.824681247154e-08 - 100 - - - -48 - 0 - 0 - 24 - 0.0000011440 - - - - 10[-] HYBL="Hybrid level" - - - 1.144498880922602e-05 - 1.174589644914685e-05 - 196 - 0 - 0 - 1|0|0|0|3|0|0|3|2|2|0|0|2|0|1|2|2|2|1|1|3|3|2|1|1|3|5|2|4|5|3|4|5|2|4|3|6|0|2|4|2|1|0|1|3|3|1|3|1|4|2|2|2|2|1|2|4|0|2|1|3|0|0|2|3|2|1|1|0|0|1|2|1|3|0|1|1|1|0|1|1|1|0|2|1|0|0|0|1|1|2|2|1|0|1|1|0|1|1|2|0|2|0|2|0|3|0|0|1|0|1|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0|1|1|0|0|0|0|0|0|0|0|0|2|0|1|0|0|0|0|0|1|1|0|0|1|0|0|0|0|0|0|0|1|0|0|0|0|1|1|1|0|0|1|0|0|0|1|1|0|0|0|0|0|1|1|1|0|0|0|0|0|1|2|0|0|0|0|0|0|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622678400 sec UTC - 10-HYBL - [-] - 1622678400 sec UTC - 1.1745132724172e-05 - 1.1539705123401e-05 - 1.14457525342e-05 - 6.9678470128824e-08 - 100 - - - -45 - 0 - 0 - 24 - 0.0000114458 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622678400 sec UTC - 30-HYBL - [-] - 1622678400 sec UTC - 1.068031849627e-07 - 8.8144559515281e-08 - 7.4302164421169e-08 - 9.4481458808801e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000743 - - - - 10[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622764800 sec UTC - 10-HYBL - [-] - 1622764800 sec UTC - 1.2193680959172e-05 - 1.1760300362674e-05 - 1.147888997366e-05 - 1.7730224129066e-07 - 100 - - - -44 - 0 - 0 - 24 - 0.0000114789 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622764800 sec UTC - 30-HYBL - [-] - 1622764800 sec UTC - 1.1384071285647e-07 - 9.2106310315715e-08 - 7.2270665896212e-08 - 9.5383389050812e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000723 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 10-ISBL - [-] - 1622678400 sec UTC - 1.6276792848657e-05 - 1.6106599578423e-05 - 1.583885295986e-05 - 1.0153528902132e-07 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 20-ISBL - [-] - 1622678400 sec UTC - 1.2929541298945e-05 - 1.2611470742276e-05 - 1.2212967703817e-05 - 1.4723476239413e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 50-ISBL - [-] - 1622678400 sec UTC - 2.8687002213701e-06 - 2.5890412616161e-06 - 2.299082780155e-06 - 1.428912787031e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 100-ISBL - [-] - 1622678400 sec UTC - 2.502025040485e-07 - 1.9998846863352e-07 - 1.6797713442429e-07 - 1.9060562971876e-08 - 100 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 10-ISBL - [-] - 1622764800 sec UTC - 1.6031418454077e-05 - 1.5874708642328e-05 - 1.5749257727293e-05 - 7.265758657701e-08 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 20-ISBL - [-] - 1622764800 sec UTC - 1.3027401109866e-05 - 1.2695418569578e-05 - 1.1947801795031e-05 - 2.1390172203242e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 50-ISBL - [-] - 1622764800 sec UTC - 2.9717652978434e-06 - 2.6961222537076e-06 - 2.4221099010902e-06 - 1.215710670366e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 100-ISBL - [-] - 1622764800 sec UTC - 2.741275579865e-07 - 2.0168293846781e-07 - 1.650793706176e-07 - 2.4385349641867e-08 - 100 - - - diff --git a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626995.380916-11651-14-ca8e7236-16ca-4e11-919d-bdbd5a51da35.grb.aux.xml b/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626995.380916-11651-14-ca8e7236-16ca-4e11-919d-bdbd5a51da35.grb.aux.xml deleted file mode 100644 index 8b9237893..000000000 --- a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650626995.380916-11651-14-ca8e7236-16ca-4e11-919d-bdbd5a51da35.grb.aux.xml +++ /dev/null @@ -1,342 +0,0 @@ - - - 1[-] HYBL="Hybrid level" - - - 1.136666106290528e-06 - 1.200369887769461e-06 - 196 - 0 - 0 - 1|1|0|1|1|2|7|1|0|0|0|1|1|1|2|1|3|3|3|4|6|2|1|0|0|0|0|0|0|0|0|0|0|0|0|1|0|1|1|2|2|6|5|1|1|2|0|1|0|1|1|0|1|1|1|1|0|1|0|1|1|2|7|1|0|0|0|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|1|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|0|1|0|0|0|0|0|1|0|1|0|1|1|1|5|2|1|1|0|0|0|0|0|1|0|1|0|1|1|1|4|3|1|1|0|0|0|0|1|0|0|1|1|0|1|2|3|3|1|1|0|0|0|0|1|0|1|0|1|1|1|1|2|4|1|1|0|0|0|1|0|1|0|1|0|1|1|1|2|3|2|1|0|0|1|0|1|0|1|0|1|0|1|1|2|3|2|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622678400 sec UTC - 1-HYBL - [-] - 1622678400 sec UTC - 1.2002082030449e-06 - 1.1662431895312e-06 - 1.1368277910151e-06 - 1.9422780853555e-08 - 100 - - - -47 - 0 - 0 - 24 - 0.0000011368 - - - - 1[-] HYBL="Hybrid level" - - - 1.143818000822908e-06 - 1.20314421841223e-06 - 196 - 0 - 0 - 3|2|1|1|2|5|0|0|0|0|1|3|1|2|2|4|1|0|3|2|1|1|3|4|0|0|0|0|0|0|0|0|4|1|1|1|2|4|1|0|0|0|0|0|0|0|0|0|0|0|0|3|3|5|7|1|1|0|2|2|4|0|0|0|0|0|0|0|0|0|0|0|3|1|1|1|0|1|1|2|4|0|0|0|0|0|0|0|3|1|1|0|1|0|1|1|2|2|2|0|0|0|0|0|3|1|1|0|1|0|1|1|0|1|3|2|0|0|0|0|0|3|1|1|1|0|1|0|1|1|0|4|1|0|0|0|0|1|3|1|0|1|1|0|1|1|0|2|3|0|0|0|0|0|3|2|0|1|1|0|1|1|0|2|3|0|0|0|0|0|0|4|1|1|0|1|1|0|1|4|1|0|0|0|0|0|0|4|1|1|0|1|1|0|3|3 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622764800 sec UTC - 1-HYBL - [-] - 1622764800 sec UTC - 1.2029936442559e-06 - 1.1711197875953e-06 - 1.1439685749792e-06 - 1.824681247154e-08 - 100 - - - -48 - 0 - 0 - 24 - 0.0000011440 - - - - 10[-] HYBL="Hybrid level" - - - 1.144498880922602e-05 - 1.174589644914685e-05 - 196 - 0 - 0 - 1|0|0|0|3|0|0|3|2|2|0|0|2|0|1|2|2|2|1|1|3|3|2|1|1|3|5|2|4|5|3|4|5|2|4|3|6|0|2|4|2|1|0|1|3|3|1|3|1|4|2|2|2|2|1|2|4|0|2|1|3|0|0|2|3|2|1|1|0|0|1|2|1|3|0|1|1|1|0|1|1|1|0|2|1|0|0|0|1|1|2|2|1|0|1|1|0|1|1|2|0|2|0|2|0|3|0|0|1|0|1|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0|1|1|0|0|0|0|0|0|0|0|0|2|0|1|0|0|0|0|0|1|1|0|0|1|0|0|0|0|0|0|0|1|0|0|0|0|1|1|1|0|0|1|0|0|0|1|1|0|0|0|0|0|1|1|1|0|0|0|0|0|1|2|0|0|0|0|0|0|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622678400 sec UTC - 10-HYBL - [-] - 1622678400 sec UTC - 1.1745132724172e-05 - 1.1539705123401e-05 - 1.14457525342e-05 - 6.9678470128824e-08 - 100 - - - -45 - 0 - 0 - 24 - 0.0000114458 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622678400 sec UTC - 30-HYBL - [-] - 1622678400 sec UTC - 1.068031849627e-07 - 8.8144559515281e-08 - 7.4302164421169e-08 - 9.4481458808801e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000743 - - - - 10[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622764800 sec UTC - 10-HYBL - [-] - 1622764800 sec UTC - 1.2193680959172e-05 - 1.1760300362674e-05 - 1.147888997366e-05 - 1.7730224129066e-07 - 100 - - - -44 - 0 - 0 - 24 - 0.0000114789 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622764800 sec UTC - 30-HYBL - [-] - 1622764800 sec UTC - 1.1384071285647e-07 - 9.2106310315715e-08 - 7.2270665896212e-08 - 9.5383389050812e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000723 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 10-ISBL - [-] - 1622678400 sec UTC - 1.6276792848657e-05 - 1.6106599578423e-05 - 1.583885295986e-05 - 1.0153528902132e-07 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 20-ISBL - [-] - 1622678400 sec UTC - 1.2929541298945e-05 - 1.2611470742276e-05 - 1.2212967703817e-05 - 1.4723476239413e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 50-ISBL - [-] - 1622678400 sec UTC - 2.8687002213701e-06 - 2.5890412616161e-06 - 2.299082780155e-06 - 1.428912787031e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 100-ISBL - [-] - 1622678400 sec UTC - 2.502025040485e-07 - 1.9998846863352e-07 - 1.6797713442429e-07 - 1.9060562971876e-08 - 100 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 10-ISBL - [-] - 1622764800 sec UTC - 1.6031418454077e-05 - 1.5874708642328e-05 - 1.5749257727293e-05 - 7.265758657701e-08 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 20-ISBL - [-] - 1622764800 sec UTC - 1.3027401109866e-05 - 1.2695418569578e-05 - 1.1947801795031e-05 - 2.1390172203242e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 50-ISBL - [-] - 1622764800 sec UTC - 2.9717652978434e-06 - 2.6961222537076e-06 - 2.4221099010902e-06 - 1.215710670366e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 100-ISBL - [-] - 1622764800 sec UTC - 2.741275579865e-07 - 2.0168293846781e-07 - 1.650793706176e-07 - 2.4385349641867e-08 - 100 - - - diff --git a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650627030.319457-19905-15-0ede0273-89e3-4100-a0f2-48916ca607ed.grb.aux.xml b/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650627030.319457-19905-15-0ede0273-89e3-4100-a0f2-48916ca607ed.grb.aux.xml deleted file mode 100644 index 8b9237893..000000000 --- a/src/test/resources/binary/grib-cams/adaptor.mars.internal-1650627030.319457-19905-15-0ede0273-89e3-4100-a0f2-48916ca607ed.grb.aux.xml +++ /dev/null @@ -1,342 +0,0 @@ - - - 1[-] HYBL="Hybrid level" - - - 1.136666106290528e-06 - 1.200369887769461e-06 - 196 - 0 - 0 - 1|1|0|1|1|2|7|1|0|0|0|1|1|1|2|1|3|3|3|4|6|2|1|0|0|0|0|0|0|0|0|0|0|0|0|1|0|1|1|2|2|6|5|1|1|2|0|1|0|1|1|0|1|1|1|1|0|1|0|1|1|2|7|1|0|0|0|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|1|0|0|0|0|0|0|1|0|1|0|1|1|1|6|2|0|1|0|0|0|0|0|1|0|1|0|1|1|1|5|2|1|1|0|0|0|0|0|1|0|1|0|1|1|1|4|3|1|1|0|0|0|0|1|0|0|1|1|0|1|2|3|3|1|1|0|0|0|0|1|0|1|0|1|1|1|1|2|4|1|1|0|0|0|1|0|1|0|1|0|1|1|1|2|3|2|1|0|0|1|0|1|0|1|0|1|0|1|1|2|3|2|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622678400 sec UTC - 1-HYBL - [-] - 1622678400 sec UTC - 1.2002082030449e-06 - 1.1662431895312e-06 - 1.1368277910151e-06 - 1.9422780853555e-08 - 100 - - - -47 - 0 - 0 - 24 - 0.0000011368 - - - - 1[-] HYBL="Hybrid level" - - - 1.143818000822908e-06 - 1.20314421841223e-06 - 196 - 0 - 0 - 3|2|1|1|2|5|0|0|0|0|1|3|1|2|2|4|1|0|3|2|1|1|3|4|0|0|0|0|0|0|0|0|4|1|1|1|2|4|1|0|0|0|0|0|0|0|0|0|0|0|0|3|3|5|7|1|1|0|2|2|4|0|0|0|0|0|0|0|0|0|0|0|3|1|1|1|0|1|1|2|4|0|0|0|0|0|0|0|3|1|1|0|1|0|1|1|2|2|2|0|0|0|0|0|3|1|1|0|1|0|1|1|0|1|3|2|0|0|0|0|0|3|1|1|1|0|1|0|1|1|0|4|1|0|0|0|0|1|3|1|0|1|1|0|1|1|0|2|3|0|0|0|0|0|3|2|0|1|1|0|1|1|0|2|3|0|0|0|0|0|0|4|1|1|0|1|1|0|1|4|1|0|0|0|0|0|0|4|1|1|0|1|1|0|3|3 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 1 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 1 255 255 255 255 255 255 - 1622764800 sec UTC - 1-HYBL - [-] - 1622764800 sec UTC - 1.2029936442559e-06 - 1.1711197875953e-06 - 1.1439685749792e-06 - 1.824681247154e-08 - 100 - - - -48 - 0 - 0 - 24 - 0.0000011440 - - - - 10[-] HYBL="Hybrid level" - - - 1.144498880922602e-05 - 1.174589644914685e-05 - 196 - 0 - 0 - 1|0|0|0|3|0|0|3|2|2|0|0|2|0|1|2|2|2|1|1|3|3|2|1|1|3|5|2|4|5|3|4|5|2|4|3|6|0|2|4|2|1|0|1|3|3|1|3|1|4|2|2|2|2|1|2|4|0|2|1|3|0|0|2|3|2|1|1|0|0|1|2|1|3|0|1|1|1|0|1|1|1|0|2|1|0|0|0|1|1|2|2|1|0|1|1|0|1|1|2|0|2|0|2|0|3|0|0|1|0|1|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0|1|1|0|0|0|0|0|0|0|0|0|2|0|1|0|0|0|0|0|1|1|0|0|1|0|0|0|0|0|0|0|1|0|0|0|0|1|1|1|0|0|1|0|0|0|1|1|0|0|0|0|0|1|1|1|0|0|0|0|0|1|2|0|0|0|0|0|0|1 - - - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622678400 sec UTC - 10-HYBL - [-] - 1622678400 sec UTC - 1.1745132724172e-05 - 1.1539705123401e-05 - 1.14457525342e-05 - 6.9678470128824e-08 - 100 - - - -45 - 0 - 0 - 24 - 0.0000114458 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-03T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622678400 sec UTC - 30-HYBL - [-] - 1622678400 sec UTC - 1.068031849627e-07 - 8.8144559515281e-08 - 7.4302164421169e-08 - 9.4481458808801e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000743 - - - - 10[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 10 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 10 255 255 255 255 255 255 - 1622764800 sec UTC - 10-HYBL - [-] - 1622764800 sec UTC - 1.2193680959172e-05 - 1.1760300362674e-05 - 1.147888997366e-05 - 1.7730224129066e-07 - 100 - - - -44 - 0 - 0 - 24 - 0.0000114789 - - - - 30[-] HYBL="Hybrid level" - - (prodType 192, cat 210, subcat 203) [-] - 192 - unknown - 0 sec - CENTER=98(ECMWF) SUBCENTER=0 MASTER_TABLE=5 LOCAL_TABLE=0 SIGNF_REF_TIME=1(Start_of_Forecast) REF_TIME=2021-06-04T00:00:00Z PROD_STATUS=0(Operational) TYPE=0(Analysis) - 0 - 210 203 0 255 146 65535 255 1 0 105 0 30 255 -127 -2147483647 - 210 203 0 255 146 255 255 255 1 0 0 0 0 105 0 0 0 0 30 255 255 255 255 255 255 - 1622764800 sec UTC - 30-HYBL - [-] - 1622764800 sec UTC - 1.1384071285647e-07 - 9.2106310315715e-08 - 7.2270665896212e-08 - 9.5383389050812e-09 - 100 - - - -48 - 0 - 0 - 24 - 0.0000000723 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 10-ISBL - [-] - 1622678400 sec UTC - 1.6276792848657e-05 - 1.6106599578423e-05 - 1.583885295986e-05 - 1.0153528902132e-07 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 20-ISBL - [-] - 1622678400 sec UTC - 1.2929541298945e-05 - 1.2611470742276e-05 - 1.2212967703817e-05 - 1.4723476239413e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 50-ISBL - [-] - 1622678400 sec UTC - 2.8687002213701e-06 - 2.5890412616161e-06 - 2.299082780155e-06 - 1.428912787031e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622678400 sec UTC - 100-ISBL - [-] - 1622678400 sec UTC - 2.502025040485e-07 - 1.9998846863352e-07 - 1.6797713442429e-07 - 1.9060562971876e-08 - 100 - - - - 10[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 10-ISBL - [-] - 1622764800 sec UTC - 1.6031418454077e-05 - 1.5874708642328e-05 - 1.5749257727293e-05 - 7.265758657701e-08 - 100 - - - - 20[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 20-ISBL - [-] - 1622764800 sec UTC - 1.3027401109866e-05 - 1.2695418569578e-05 - 1.1947801795031e-05 - 2.1390172203242e-07 - 100 - - - - 50[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 50-ISBL - [-] - 1622764800 sec UTC - 2.9717652978434e-06 - 2.6961222537076e-06 - 2.4221099010902e-06 - 1.215710670366e-07 - 100 - - - - 100[hPa] ISBL (Isobaric surface) - - undefined [-] - var203 of table 210 of center ECMWF - 0 sec - 1622764800 sec UTC - 100-ISBL - [-] - 1622764800 sec UTC - 2.741275579865e-07 - 2.0168293846781e-07 - 1.650793706176e-07 - 2.4385349641867e-08 - 100 - - - diff --git a/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala b/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala index f26ea529e..623466d36 100644 --- a/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala +++ b/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala @@ -1,10 +1,12 @@ package com.databricks.labs.mosaic.core.geometry.multipoint +import com.databricks.labs.mosaic.core.geometry.linestring.{MosaicLineString, MosaicLineStringJTS} +import com.databricks.labs.mosaic.core.geometry.multipolygon.MosaicMultiPolygonJTS import com.databricks.labs.mosaic.core.geometry.point.MosaicPointJTS import com.databricks.labs.mosaic.core.geometry.polygon.MosaicPolygonJTS +import com.databricks.labs.mosaic.core.raster.operator.rasterize.GDALRasterize import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers._ - import org.apache.spark.sql.catalyst.InternalRow //noinspection ScalaRedundantCast @@ -121,4 +123,29 @@ class TestMultiPointJTS extends AnyFlatSpec { multiPoint.mapXY({ (x: Double, y: Double) => (x * 2, y / 2) }).getSpatialReference shouldBe srid } + private val emptyLineString = MosaicLineStringJTS.fromWKT("LINESTRING EMPTY").asInstanceOf[MosaicLineString] + + "MosaicMultiPointJTS" should "perform an unconstrained Delauny tringulation" in { + + val multiPoint = MosaicMultiPointJTS.fromWKT("MULTIPOINT Z (2 1 0, 3 2 1, 1 3 3, 0 2 2)").asInstanceOf[MosaicMultiPointJTS] + val triangulated = multiPoint.triangulate(Seq(emptyLineString), 0.00, 0.01) + MosaicMultiPolygonJTS.fromSeq(triangulated).toWKT shouldBe "MULTIPOLYGON Z(((0 2 2, 2 1 0, 1 3 3, 0 2 2)), ((1 3 3, 2 1 0, 3 2 1, 1 3 3)))" + } + + "MosaicMultiPointJTS" should "generate an equally spaced grid of points for use in elevation interpolation" in { + val origin = MosaicPointJTS.fromWKT("POINT (0 0)").asInstanceOf[MosaicPointJTS] + val grid = MosaicMultiPointJTS.fromWKT("MULTIPOINT (0 0, 0 1, 0 2, 1 0, 1 1, 1 2, 2 0, 2 1, 2 2)").asInstanceOf[MosaicMultiPointJTS] + val generatedGrid = grid.pointGrid(origin, 3, 3, 1.0, 1.0) + generatedGrid.toWKT shouldBe grid.toWKT + } + + "MosaicMultiPointJTS" should "perform elevation interpolation" in { + val multiPoint = MosaicMultiPointJTS.fromWKT("MULTIPOINT Z (2.5 1.5 0, 3.5 2.5 1, 1.5 3.5 3, 0.5 2.5 2)").asInstanceOf[MosaicMultiPointJTS] + val origin = MosaicPointJTS.fromWKT("POINT (0 0)").asInstanceOf[MosaicPointJTS] + val gridPoints = multiPoint.pointGrid(origin, 5, 5, 1, 1).intersection(multiPoint.convexHull).asInstanceOf[MosaicMultiPointJTS] + val z = multiPoint.interpolateElevation(Seq(emptyLineString), gridPoints, 0.00, 0.01) + z.toWKT shouldBe "MULTIPOINT Z((1 3 2.5), (2 2 0.8333333333333334), (2 3 2.1666666666666665), (3 2 0.5))" + z.asSeq.map(_.getZ) shouldBe Seq(2.5, 0.8333333333333334, 2.1666666666666665, 0.5) + } + } diff --git a/src/test/scala/com/databricks/labs/mosaic/core/raster/TestRasterGDAL.scala b/src/test/scala/com/databricks/labs/mosaic/core/raster/TestRasterGDAL.scala index 40124029a..642b31281 100644 --- a/src/test/scala/com/databricks/labs/mosaic/core/raster/TestRasterGDAL.scala +++ b/src/test/scala/com/databricks/labs/mosaic/core/raster/TestRasterGDAL.scala @@ -33,11 +33,11 @@ class TestRasterGDAL extends SharedSparkSessionGDAL { resultExecutors.foreach(s => s should include("GDAL")) } - test("Verify that checkpoint is not used.") { - spark.conf.get(MOSAIC_TEST_MODE) shouldBe "true" - MosaicGDAL.isUseCheckpoint shouldBe false - } - +// test("Verify that checkpoint is not used.") { +// spark.conf.get(MOSAIC_TEST_MODE) shouldBe "true" +// MosaicGDAL.isUseCheckpoint shouldBe false +// } +// test("Read raster metadata from GeoTIFF file.") { assume(System.getProperty("os.name") == "Linux") diff --git a/src/test/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/TestGDALRasterize.scala b/src/test/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/TestGDALRasterize.scala new file mode 100644 index 000000000..c35e399ce --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/TestGDALRasterize.scala @@ -0,0 +1,48 @@ +package com.databricks.labs.mosaic.core.raster.operator.rasterize + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class TestGDALRasterize extends AnyFunSuite { + + // https://oeis.org/A341198 + val points: Seq[String] = Seq( + "POINT Z (0 2 1)", "POINT Z (1 2 5)", "POINT Z (2 2 37)", + "POINT Z (0 1 21)", "POINT Z (1 1 61)", "POINT Z (2 1 129)", + "POINT Z (0 0 97)", "POINT Z (1 0 177)", "POINT Z (2 0 221)", + ) + val pointGeoms: Seq[MosaicGeometry] = points.map(p => GeometryAPI.apply("JTS").geometry(p, "WKT")) + + test("TestGDALRasterize: should rasterize point geometries using Z") { + + pointGeoms.foreach(_.setSpatialReference(4326)) + + val origin = "POINT (0 2)" // top left-hand corner of north-up image + val originGeom = GeometryAPI.apply("JTS").geometry(origin, "WKT").asInstanceOf[MosaicPoint] + originGeom.setSpatialReference(4326) + + val raster = GDALRasterize.executeRasterize(pointGeoms, None, originGeom, 3, 3, 1.0, -1.0) + + val result = raster.getBand(1).values + result shouldBe Seq(1, 5, 37, 21, 61, 129, 97, 177, 221) + } + + test("TestGDALRasterize: should rasterize a point geometry using Z in a non-WGS84 projection") { + pointGeoms.foreach(_.setSpatialReference(27700)) + val translatedPoints = pointGeoms.map(_.translate(348000.0, 462000.0)) + + val origin = "POINT (0 2)" // top left-hand corner of north-up image + val originGeom = GeometryAPI("JTS").geometry(origin, "WKT") + .translate(348000.0, 462000.0) + .asInstanceOf[MosaicPoint] + originGeom.setSpatialReference(27700) + + val raster = GDALRasterize.executeRasterize(translatedPoints, None, originGeom, 3, 3, 1.0, -1.0) + + val result = raster.getBand(1).values + result shouldBe Seq(1, 5, 37, 21, 61, 129, 97, 177, 221) + } +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala new file mode 100644 index 000000000..42c719a31 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala @@ -0,0 +1,122 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import com.databricks.labs.mosaic.functions.MosaicRegistryBehaviors.mosaicContext +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.types._ +import org.scalatest.matchers.must.Matchers.noException +import org.scalatest.matchers.should.Matchers._ + +trait ST_InterpolateElevationBehaviours extends QueryTest { + + val pointsPath = "src/test/resources/binary/elevation/sd46_dtm_point.shp" + val linesPath = "src/test/resources/binary/elevation/sd46_dtm_breakline.shp" + val outputRegion = "POLYGON((348000 462000, 348000 461000, 349000 461000, 349000 462000, 348000 462000))" + val buffer = 50.0 + val xWidth = 1000 + val yWidth = 1000 + val xSize = 1.0 + val ySize = -1.0 + val mergeTolerance = 0.0 + val snapTolerance = 0.01 + val origin = "POINT(348000 462000)" + + def simpleInterpolationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = mosaicContext + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val points = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + + val result = points + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("mergeTolerance", lit(mergeTolerance)) + .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("origin", st_geomfromwkt(lit(origin))) + .withColumn("grid_size_x", lit(xWidth)) + .withColumn("grid_size_y", lit(yWidth)) + .withColumn("pixel_size_x", lit(xSize)) + .withColumn("pixel_size_y", lit(ySize)) + .withColumn("elevation", st_interpolateelevation( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y")) + .drop( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", + $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + ) + noException should be thrownBy result.collect() + result.count() shouldBe 1000000L + } + + def conformingInterpolationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = mosaicContext + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val points = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + + val linesDf = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(linesPath) + .where(st_geometrytype($"geom_0") === "LINESTRING") + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + .groupBy() + .agg(collect_list($"geom_0").as("breaklines")) + + val result = points + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .crossJoin(linesDf) + .withColumn("mergeTolerance", lit(mergeTolerance)) + .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("origin", st_geomfromwkt(lit(origin))) + .withColumn("grid_size_x", lit(xWidth)) + .withColumn("grid_size_y", lit(yWidth)) + .withColumn("pixel_size_x", lit(xSize)) + .withColumn("pixel_size_y", lit(ySize)) + .withColumn("interpolated_grid_point", st_interpolateelevation( + $"masspoints", $"breaklines",$"mergeTolerance", $"snapTolerance", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y")) + .withColumn("elevation", st_z($"interpolated_grid_point")) + .drop( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", + $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + ) + .cache() + noException should be thrownBy result.collect() + result.count() shouldBe 1000000L + val targetRow = result + .orderBy( + st_y($"interpolated_grid_point").asc, + st_x($"interpolated_grid_point").desc + ) + .first() + targetRow.getAs[Double]("elevation") shouldBe 63.55 +- 0.1 // rough reckoning from examining the reference raster + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationTest.scala new file mode 100644 index 000000000..3a7d3f912 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationTest.scala @@ -0,0 +1,11 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +case class ST_InterpolateElevationTest() extends QueryTest with SharedSparkSession with ST_InterpolateElevationBehaviours { + test("Testing ST_InterpolateElevation (H3, JTS) to produce interpolated grid elevations on an unconstrained triangulation") { simpleInterpolationBehavior(H3IndexSystem, JTS)} + test("Testing ST_InterpolateElevation (H3, JTS) to produce interpolated grid elevations on an conforming triangulation") { conformingInterpolationBehavior(H3IndexSystem, JTS)} +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala new file mode 100644 index 000000000..71e79ca86 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala @@ -0,0 +1,88 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import com.databricks.labs.mosaic.functions.MosaicRegistryBehaviors.mosaicContext +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.{array, collect_list, explode, lit} +import org.apache.spark.sql.types._ +import org.scalatest.matchers.must.Matchers.noException +import org.scalatest.matchers.should.Matchers._ + + +trait ST_TriangulateBehaviours extends QueryTest { + + val pointsPath = "src/test/resources/binary/elevation/sd46_dtm_point.shp" + val linesPath = "src/test/resources/binary/elevation/sd46_dtm_breakline.shp" + val outputRegion = "POLYGON((348000 462000, 348000 461000, 349000 461000, 349000 462000, 348000 462000))" + val buffer = 50.0 + val mergeTolerance = 0.0 + val snapTolerance = 0.01 + + def simpleTriangulateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = mosaicContext + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val points = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + + val result = points + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance))) + .drop($"masspoints") + noException should be thrownBy result.collect() + result.count() shouldBe 4445 + + } + + def conformingTriangulateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = mosaicContext + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val points = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + + val breaklines = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(linesPath) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(buffer)))) + + val linesDf = breaklines + .where(st_geometrytype($"geom_0") === "LINESTRING") + .groupBy() + .agg(collect_list($"geom_0").as("breaklines")) + + val result = points + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .crossJoin(linesDf) + .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance))) + .drop($"masspoints", $"breaklines") + + noException should be thrownBy result.collect() + result.count() should be > points.count() + + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala new file mode 100644 index 000000000..797a787cd --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala @@ -0,0 +1,12 @@ +package com.databricks.labs.mosaic.expressions.geometry + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSession + +class ST_TriangulateTest extends QueryTest with SharedSparkSession with ST_TriangulateBehaviours { + test("Testing ST_Triangulate (H3, JTS) to produce unconstrained triangulation") { simpleTriangulateBehavior(H3IndexSystem, JTS)} + test("Testing ST_Triangulate (H3, JTS) to produce conforming triangulation") { conformingTriangulateBehavior(H3IndexSystem, JTS)} + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala index d2c3f89d2..5819d8609 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala @@ -77,6 +77,44 @@ trait ST_UnionBehaviors extends QueryTest { results.zip(expected).foreach { case (l, r) => l.equalsTopo(r) shouldEqual true } } + def unionAggPointsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark + import mc.functions._ + import sc.implicits._ + mc.register(spark) + + val polygonRows = List( + List(1L, "POINT Z (10 10 10)"), + List(1L, "POINT Z (15 15 15)") + ) + val rows = polygonRows.map { x => Row(x: _*) } + val rdd = spark.sparkContext.makeRDD(rows) + val schema = StructType( + Seq( + StructField("row_id", LongType), + StructField("polygons", StringType) + ) + ) + + val polygons = spark.createDataFrame(rdd, schema) + val expected = List("MULTIPOINT ((10 10), (15 15))") + .map(mc.getGeometryAPI.geometry(_, "WKT")) + + val results = polygons + .groupBy($"row_id") + .agg( + st_union_agg($"polygons").alias("result") + ) + .select($"result") + .as[Array[Byte]] + .collect() + .map(mc.getGeometryAPI.geometry(_, "WKB")) + + results.zip(expected).foreach { case (l, r) => l.equalsTopo(r) shouldEqual true } + } + def unionCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionTest.scala index ea0ee07c4..f6df111e0 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionTest.scala @@ -27,6 +27,8 @@ class ST_UnionTest extends QueryTest with SharedSparkSession with ST_UnionBehavi test("Testing stUnionAgg (H3, JTS) NO_CODEGEN") { noCodegen { unionAggBehavior(H3IndexSystem, JTS) } } test("Testing stUnionAgg (BNG, JTS) NO_CODEGEN") { noCodegen { unionAggBehavior(BNGIndexSystem, JTS) } } test("Testing stUnion (H3, JTS) CODEGEN compilation") { codegenOnly { unionCodegen(H3IndexSystem, JTS) } } + test("Testing stUnionAgg (H3, JTS) NO_CODEGEN (Points)") { noCodegen { unionAggPointsBehavior(H3IndexSystem, JTS) } } + test("Testing stUnionAgg (BNG, JTS) NO_CODEGEN (Points)") { noCodegen { unionAggPointsBehavior(BNGIndexSystem, JTS) } } test("Testing stUnion (BNG, JTS) CODEGEN compilation") { codegenOnly { unionCodegen(BNGIndexSystem, JTS) } } test("Testing stUnion (H3, JTS) CODEGEN_ONLY") { codegenOnly { unionBehavior(H3IndexSystem, JTS) } } test("Testing stUnion (BNG, JTS) CODEGEN_ONLY") { codegenOnly { unionBehavior(BNGIndexSystem, JTS) } } diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala new file mode 100644 index 000000000..d3afc1788 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala @@ -0,0 +1,177 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import org.apache.spark.sql.functions.{array, collect_list, lit} +import org.apache.spark.sql.test.SharedSparkSessionGDAL +import org.apache.spark.sql.types.{ArrayType, StringType} +import org.scalatest.matchers.should.Matchers._ + +trait RST_DTMFromGeomsBehaviours extends SharedSparkSessionGDAL { + + val pointsPath = "src/test/resources/binary/elevation/sd46_dtm_point.shp" + val linesPath = "src/test/resources/binary/elevation/sd46_dtm_breakline.shp" + val mergeTolerance = 0.0 + val snapTolerance = 0.01 + + def simpleRasterizeTest(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = MosaicContext.build(indexSystem, geometryAPI) + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val pointsDf = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + val result = pointsDf + .withColumn("geom_0", st_geomfromwkb($"geom_0")) + .withColumn("geom_0", st_setsrid($"geom_0", lit(27700))) + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .withColumn("breaklines", array().cast(ArrayType(StringType))) + .withColumn("mergeTolerance", lit(mergeTolerance)) + .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("origin", st_point(lit(348000.0), lit(462000.0))) + .withColumn("grid_size_x", lit(1000)) + .withColumn("grid_size_y", lit(1000)) + .withColumn("pixel_size_x", lit(1.0)) + .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("tile", rst_dtmfromgeoms( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y")) + .drop( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", + $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + ).cache() + noException should be thrownBy result.collect() + + } + + def conformedTriangulationRasterizeTest(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = MosaicContext.build(indexSystem, geometryAPI) + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val outputRegion = "POLYGON((348000 462000, 348000 461000, 349000 461000, 349000 462000, 348000 462000))" + + val pointsDf = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + + val breaklines = MosaicContext.read.option("asWKB", "true").format("multi_read_ogr").load(linesPath) + + val linesDf = breaklines + .where(st_geometrytype($"geom_0") === "LINESTRING") + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(500.0)))) + .groupBy() + .agg(collect_list($"geom_0").as("breaklines")) + + val result = pointsDf + .withColumn("geom_0", st_geomfromwkb($"geom_0")) + .withColumn("geom_0", st_setsrid($"geom_0", lit(27700))) + .withColumn("filterGeom", st_geomfromwkt(lit(outputRegion))) + .where(st_intersects($"geom_0", st_buffer($"filterGeom", lit(500.0)))) + .where(st_x($"geom_0").notEqual(lit(0))) + .where(st_y($"geom_0").notEqual(lit(0))) + .groupBy() + .agg(collect_list($"geom_0").as("masspoints")) + .crossJoin(linesDf) + .withColumn("mergeTolerance", lit(mergeTolerance)) + .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("origin", st_point(lit(348000.0), lit(462000.0))) + .withColumn("grid_size_x", lit(1000)) + .withColumn("grid_size_y", lit(1000)) + .withColumn("pixel_size_x", lit(1.0)) + .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("tile", rst_dtmfromgeoms( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y")) + .drop( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", + $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + ).cache() + noException should be thrownBy result.collect() + result.select($"tile").show(truncate = false) + } + + def multiRegionTriangulationRasterizeTest(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + + val mc = MosaicContext.build(indexSystem, geometryAPI) + import mc.functions._ + val sc = spark + import sc.implicits._ + mc.register(spark) + + val outputRegion = "POLYGON ((340000 460000, 350000 460000, 350000 470000, 340000 470000, 340000 460000))" + val rasterBuffer = 500.0 + + val rasterExtentsDf = + List(outputRegion).toDF("wkt") + .withColumn("extent_geom", st_geomfromwkt($"wkt")) + .withColumn("extent_geom", st_setsrid($"extent_geom", lit(27700))) + .withColumn("cells", grid_tessellateexplode($"extent_geom", lit(3))) + .withColumn("extent", st_geomfromwkb($"cells.wkb")) + .withColumn("extent_buffered", st_buffer($"extent", lit(rasterBuffer))) + .withColumn("raster_origin", st_point(st_xmin($"extent"), st_ymax($"extent"))) // top left + .withColumn("raster_origin", st_setsrid($"raster_origin", lit(27700))) + .select("cells.index_id", "extent_buffered", "raster_origin") + + val pointsDf = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(pointsPath) + .where($"geom_0".notEqual(lit("POINT EMPTY"))) + .withColumn("geom", st_geomfromwkb($"geom_0")) + .withColumn("geom", st_setsrid($"geom", lit(27700))) + .crossJoin(rasterExtentsDf) + .where(st_intersects($"geom", $"extent_buffered")) + .groupBy("index_id", "raster_origin") + .agg(collect_list($"geom").as("masspoints")) + + val linesDf = MosaicContext.read + .option("asWKB", "true") + .format("multi_read_ogr") + .load(linesPath) + .where(st_geometrytype($"geom_0") === "LINESTRING") + .withColumn("geom", st_geomfromwkb($"geom_0")) + .withColumn("geom", st_setsrid($"geom", lit(27700))) + .crossJoin(rasterExtentsDf) + .where(st_intersects($"geom", $"extent_buffered")) + .groupBy("index_id", "raster_origin") + .agg(collect_list($"geom").as("breaklines")) + + val inputsDf = pointsDf + .join(linesDf, Seq("index_id", "raster_origin"), "left") + + val result = inputsDf + .repartition(sc.sparkContext.defaultParallelism) + .withColumn("mergeTolerance", lit(mergeTolerance)) + .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("grid_size_x", lit(1000)) + .withColumn("grid_size_y", lit(1000)) + .withColumn("pixel_size_x", lit(1.0)) + .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("tile", rst_dtmfromgeoms( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"raster_origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y")) + .drop( + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"raster_origin", + $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + ).cache() + noException should be thrownBy result.collect() + result.count() shouldBe inputsDf.count() + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala new file mode 100644 index 000000000..5196e9443 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala @@ -0,0 +1,24 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.{BNGIndexSystem, H3IndexSystem} +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSparkSessionGDAL + +class RST_DTMFromGeomsTest extends QueryTest with SharedSparkSessionGDAL with RST_DTMFromGeomsBehaviours { + test("Testing RST_DTMFromGeoms for simple triangulation with manual GDAL registration (H3, JTS).") { + assume(System.getProperty("os.name") == "Linux") + simpleRasterizeTest(H3IndexSystem, JTS) + } + test("Testing RST_DTMFromGeoms for conforming triangulation with manual GDAL registration (H3, JTS).") { + assume(System.getProperty("os.name") == "Linux") + conformedTriangulationRasterizeTest(H3IndexSystem, JTS) + } + + registerIgnoredTest("Ignored due to resource / duration")( + test("Testing RST_DTMFromGeoms for conforming triangulation over multiple grid regions with manual GDAL registration (BNG, JTS).") { + assume(System.getProperty("os.name") == "Linux") + multiRegionTriangulationRasterizeTest(BNGIndexSystem, JTS) + } + ) +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeBehaviors.scala new file mode 100644 index 000000000..77bc0a307 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeBehaviors.scala @@ -0,0 +1,49 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import org.apache.spark.sql.QueryTest +import org.scalatest.matchers.should.Matchers._ + +trait RST_TypeBehaviors extends QueryTest { + + def typeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + val sc = spark + import mc.functions._ + import sc.implicits._ + + val rastersInMemory = spark.read + .format("gdal") + .option("raster_storage", "in-memory") + .load("src/test/resources/modis/MCD43A4.A2018185.h10v07.006.2018194033728_B01.TIF") + + val df = rastersInMemory + .withColumn("result", rst_type($"tile")) + .select("result") + + rastersInMemory + .createOrReplaceTempView("source") + + noException should be thrownBy spark.sql(""" + |select rst_type(tile) from source + |""".stripMargin) + + noException should be thrownBy rastersInMemory + .withColumn("result", rst_type($"tile")) + .select("result") + + val result = df.first.getSeq[String](0).head + + result shouldBe "Int16" + + an[Exception] should be thrownBy spark.sql(""" + |select rst_type() from source + |""".stripMargin) + + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeTest.scala new file mode 100644 index 000000000..b35f75133 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TypeTest.scala @@ -0,0 +1,32 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSessionGDAL + +import scala.util.Try + +class RST_TypeTest extends QueryTest with SharedSparkSessionGDAL with RST_TypeBehaviors { + + private val noCodegen = + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString + ) _ + + // Hotfix for SharedSparkSession afterAll cleanup. + override def afterAll(): Unit = Try(super.afterAll()) + + // These tests are not index system nor geometry API specific. + // Only testing one pairing is sufficient. + test("Testing RST_Type with manual GDAL registration (H3, JTS).") { + noCodegen { + assume(System.getProperty("os.name") == "Linux") + typeBehavior(H3IndexSystem, JTS) + } + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeBehaviors.scala new file mode 100644 index 000000000..c360545d3 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeBehaviors.scala @@ -0,0 +1,49 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.lit +import org.scalatest.matchers.should.Matchers.{be, convertToAnyShouldWrapper, noException} + +trait RST_UpdateTypeBehaviors extends QueryTest { + + // noinspection MapGetGet + def behavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + val sc = spark + import mc.functions._ + import sc.implicits._ + + val rastersInMemory = spark.read + .format("gdal") + .option("raster_storage", "in-memory") + .load("src/test/resources/modis/MCD43A4.A2018185.h10v07.006.2018194033728_B01.TIF") + + val newType = "Float32" + + val df = rastersInMemory + .withColumn("updated_tile", rst_updatetype($"tile", lit(newType))) + .select(rst_type($"updated_tile").as("new_type")) + + rastersInMemory + .createOrReplaceTempView("source") + + noException should be thrownBy spark.sql(s""" + |select rst_updatetype(tile, '$newType') from source + |""".stripMargin) + + noException should be thrownBy rastersInMemory + .withColumn("tile", rst_updatetype($"tile", lit(newType))) + .select("tile") + + val result = df.first.getSeq[String](0).head + + result shouldBe newType + + } + +} \ No newline at end of file diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeTest.scala new file mode 100644 index 000000000..50140cd6e --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpdateTypeTest.scala @@ -0,0 +1,31 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSessionGDAL + +import scala.util.Try + + class RST_UpdateTypeTest extends QueryTest with SharedSparkSessionGDAL with RST_UpdateTypeBehaviors { + + private val noCodegen = + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString + ) _ + + // Hotfix for SharedSparkSession afterAll cleanup. + override def afterAll(): Unit = Try(super.afterAll()) + + // These tests are not index system nor geometry API specific. + // Only testing one pairing is sufficient. + test("Testing RST_UpdateType with manual GDAL registration (H3, JTS).") { + noCodegen { + assume(System.getProperty("os.name") == "Linux") + behavior(H3IndexSystem, JTS) + } + } + }