Skip to content

Commit

Permalink
Merge pull request #393 from databrickslabs/feature/grid_tiles
Browse files Browse the repository at this point in the history
Feature/grid tiles
  • Loading branch information
Milos Colic authored Oct 31, 2023
2 parents 2514e50 + 674227c commit 31305ed
Show file tree
Hide file tree
Showing 255 changed files with 10,086 additions and 2,508 deletions.
47 changes: 39 additions & 8 deletions .github/actions/r_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,54 @@ runs:
- name: Setup R build environment
shell: bash
run: |
sudo apt-get update && sudo apt-get install -y curl libcurl4-openssl-dev pkg-config libharfbuzz-dev libfribidi-dev
- name: Download and unpack Spark
sudo apt-get update && sudo apt-get install -y curl libcurl4-openssl-dev pkg-config libharfbuzz-dev libfribidi-dev
- name: Create download location for Spark
shell: bash
run: |
sudo mkdir -p /usr/spark-download/raw
sudo mkdir -p /usr/spark-download/unzipped
sudo mkdir -p /usr/spark-download/raw
sudo chown -R $USER: /usr/spark-download/
wget -P /usr/spark-download/raw https://archive.apache.org/dist/spark/spark-3.2.1/spark-3.2.1-bin-hadoop2.7.tgz
- name: Cache Spark download
id: cache-spark
uses: actions/cache@v3
with:
path: /usr/spark-download/unzipped
key: r_build-spark
- if: ${{ steps.cache-spark.outputs.cache-hit != 'true' }}
name: Download and unpack Spark
shell: bash
run: |
wget -P /usr/spark-download/raw https://archive.apache.org/dist/spark/spark-3.2.1/spark-3.2.1-bin-hadoop2.7.tgz
tar zxvf /usr/spark-download/raw/spark-3.2.1-bin-hadoop2.7.tgz -C /usr/spark-download/unzipped
- name: Build R package
- name: Create R environment
shell: bash
run: |
cd R
sudo mkdir -p /usr/lib/R/site-library
sudo chown -R $USER: /usr/lib/R/site-library
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.R }}
use-public-rspm: true
- name: Install R dependencies
shell: bash
run: |
cd R
Rscript --vanilla install_deps.R
- name: Generate R bindings
shell: bash
run: |
cd R
Rscript --vanilla generate_R_bindings.R ../src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala
- name: Build R docs
shell: bash
run: |
cd R
Rscript --vanilla generate_docs.R
- name: Build R package
shell: bash
run: |
cd R
Rscript --vanilla build_r_package.R
- name: Test R package
shell: bash
Expand Down
13 changes: 11 additions & 2 deletions .github/actions/scala_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,21 @@ runs:
- name: Test and build the scala JAR - skip tests is false
if: inputs.skip_tests == 'false'
shell: bash
run: sudo mvn -q clean install
run: |
pip install databricks-mosaic-gdal==3.4.3
sudo tar -xf /home/runner/.local/lib/python3.8/site-packages/databricks-mosaic-gdal/resources/gdal-3.4.3-filetree.tar.xz -C /
sudo tar -xhf /home/runner/.local/lib/python3.8/site-packages/databricks-mosaic-gdal/resources/gdal-3.4.3-symlinks.tar.xz -C /
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt clean && sudo apt -o Acquire::Retries=3 update --fix-missing -y
sudo apt-get -o Acquire::Retries=3 update -y
sudo apt-get -o Acquire::Retries=3 install -y gdal-bin=3.4.3+dfsg-1~focal0 libgdal-dev=3.4.3+dfsg-1~focal0 python3-gdal=3.4.3+dfsg-1~focal0
sudo mvn -q clean install
- name: Build the scala JAR - skip tests is true
if: inputs.skip_tests == 'true'
shell: bash
run: sudo mvn -q clean install -DskipTests
run: sudo mvn -q clean install -DskipTests -Dscoverage.skip
- name: Publish test coverage
if: inputs.skip_tests == 'false'
uses: codecov/codecov-action@v1
- name: Copy Scala artifacts to GH Actions run
shell: bash
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/build_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
python: [ 3.9 ]
spark: [ 3.2.1 ]
R: [ 4.1.2 ]
steps:
Expand All @@ -26,5 +26,7 @@ jobs:
uses: ./.github/actions/scala_build
- name: build python
uses: ./.github/actions/python_build
- name: build R
uses: ./.github/actions/r_build
- name: upload artefacts
uses: ./.github/actions/upload_artefacts
2 changes: 1 addition & 1 deletion .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
python: [ 3.9 ]
spark: [ 3.2.1 ]
R: [ 4.1.2 ]
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
python: [ 3.9 ]
spark: [ 3.2.1 ]
R: [ 4.1.2 ]
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
python: [ 3.9 ]
spark: [ 3.2.1 ]
R: [ 4.1.2 ]
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pypi-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python: [3.8.10]
python: [3.9]
spark: [3.2.1]
steps:
- name: checkout code
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#IntelliJ files
.idea
*.iml
tmp_

#VSCode files
.vscode
Expand Down Expand Up @@ -65,6 +66,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
/python/test/.run/
spatial_knn

# Translations
*.mo
Expand Down
1 change: 1 addition & 0 deletions R/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
**/.Rhistory
**/*.tar.gz
39 changes: 0 additions & 39 deletions R/build_r_package.R
Original file line number Diff line number Diff line change
@@ -1,54 +1,15 @@
repos = c(
"https://cran.ma.imperial.ac.uk" = "https://cran.ma.imperial.ac.uk"
,"https://www.stats.bris.ac.uk/R" = "https://www.stats.bris.ac.uk/R"
,"https://cran.rstudio.com/" = "https://cran.rstudio.com/"
)

mirror_is_up <- function(x){
out <- tryCatch({
available.packages(contrib.url(x))
}
,error = function(cond){return(0)}
,warning = function(cond){return(0)}
,finally = function(cond){}
)
return(length(out))
}

mirror_status = lapply(repos, mirror_is_up)
for(repo in names(mirror_status)){
if (mirror_status[[repo]] > 1){
repo <<- repo
break
}
}

install.packages("pkgbuild", repos=repo)
install.packages("roxygen2", repos=repo)
install.packages("sparklyr", repos=repo)
spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)

library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))


library(pkgbuild)
library(roxygen2)
library(sparklyr)



build_mosaic_bindings <- function(){
# build functions
scala_file_path <- "../src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala"
system_cmd <- paste0(c("Rscript --vanilla generate_R_bindings.R", scala_file_path), collapse = " ")
system(system_cmd)

# build doc
roxygen2::roxygenize("sparkR-mosaic/sparkrMosaic")
roxygen2::roxygenize("sparklyr-mosaic/sparklyrMosaic")


## build package
pkgbuild::build("sparkR-mosaic/sparkrMosaic")
pkgbuild::build("sparklyr-mosaic/sparklyrMosaic")
Expand Down
14 changes: 14 additions & 0 deletions R/generate_docs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)

library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
library(roxygen2)

build_mosaic_docs <- function(){
# build doc
roxygen2::roxygenize("sparkR-mosaic/sparkrMosaic")
roxygen2::roxygenize("sparklyr-mosaic/sparklyrMosaic")

}

build_mosaic_docs()
5 changes: 5 additions & 0 deletions R/install_deps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/focal/latest"))

install.packages("pkgbuild")
install.packages("roxygen2")
install.packages("sparklyr")
5 changes: 1 addition & 4 deletions R/sparkR-mosaic/enableMosaic.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,11 @@
enableMosaic <- function(
geometryAPI="JTS"
,indexSystem="H3"
,rasterAPI="GDAL"
){
geometry_api <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.geometry.api.GeometryAPI", methodName="apply", geometryAPI)
indexing_system <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.index.IndexSystemFactory", methodName="getIndexSystem", indexSystem)

raster_api <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.raster.api.RasterAPI", methodName="apply", rasterAPI)

mosaic_context <- sparkR.newJObject(x="com.databricks.labs.mosaic.functions.MosaicContext", indexing_system, geometry_api, raster_api)
mosaic_context <- sparkR.newJObject(x="com.databricks.labs.mosaic.functions.MosaicContext", indexing_system, geometry_api)
functions <<- sparkR.callJMethod(mosaic_context, "functions")
# register the sql functions for use in sql() commands
sparkR.callJMethod(mosaic_context, "register")
Expand Down
2 changes: 1 addition & 1 deletion R/sparkR-mosaic/sparkrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: This package extends SparkR to bring the Databricks Mosaic for geos
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Collate:
'enableMosaic.R'
'generics.R'
Expand Down
6 changes: 2 additions & 4 deletions R/sparkR-mosaic/tests.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
repo<-"https://cran.ma.imperial.ac.uk/"

spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)
library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
Expand Down Expand Up @@ -66,15 +64,15 @@ sdf <- withColumn(sdf, "transformed_geom", st_transform(column("geom_with_srid")
# Grid functions
sdf <- withColumn(sdf, "grid_longlatascellid", grid_longlatascellid(lit(1), lit(1), lit(1L)))
sdf <- withColumn(sdf, "grid_pointascellid", grid_pointascellid(column("point_wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_boundaryaswkb", grid_boundaryaswkb( SparkR::cast(lit(1), "long")))
sdf <- withColumn(sdf, "grid_boundaryaswkb", grid_boundaryaswkb(column("grid_pointascellid")))
sdf <- withColumn(sdf, "grid_polyfill", grid_polyfill(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_tessellateexplode", grid_tessellateexplode(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_tessellate", grid_tessellate(column("wkt"), lit(1L)))

# Deprecated
sdf <- withColumn(sdf, "point_index_lonlat", point_index_lonlat(lit(1), lit(1), lit(1L)))
sdf <- withColumn(sdf, "point_index_geom", point_index_geom(column("point_wkt"), lit(1L)))
sdf <- withColumn(sdf, "index_geometry", index_geometry( SparkR::cast(lit(1), "long")))
sdf <- withColumn(sdf, "index_geometry", index_geometry(column("point_index_geom")))
sdf <- withColumn(sdf, "polyfill", polyfill(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "mosaic_explode", mosaic_explode(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "mosaicfill", mosaicfill(column("wkt"), lit(1L)))
Expand Down
2 changes: 1 addition & 1 deletion R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: This package extends sparklyr to bring the Databricks Mosaic for ge
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Collate:
'enableMosaic.R'
'sparkFunctions.R'
Expand Down
Loading

0 comments on commit 31305ed

Please sign in to comment.