From 618c4e6d12f28f363c8c26d08ab51988a6c348a9 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 24 Apr 2024 20:30:47 -0700 Subject: [PATCH] [SEDONA-539] Support Snowflake geography type (#1355) --- docs/tutorial/snowflake/sql.md | 30 ++++++++++++---- .../sedona/snowflake/snowsql/TestBase.java | 17 +++++++++ .../snowsql/TestFunctionsGeography.java | 36 +++++++++++++++++++ .../snowsql/ddl/UDFDDLGenerator.java | 7 ++++ 4 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsGeography.java diff --git a/docs/tutorial/snowflake/sql.md b/docs/tutorial/snowflake/sql.md index 88847dfcdc..1f57a6aa6b 100644 --- a/docs/tutorial/snowflake/sql.md +++ b/docs/tutorial/snowflake/sql.md @@ -28,9 +28,9 @@ POINT (-122.33 47.61) Seattle POINT (-122.42 37.76) San Francisco ``` -## Create a Geometry column +## Create a Geometry/Geography column -All geometrical operations in SedonaSQL are on Geometry type objects. Therefore, before any kind of queries, you need to create a Geometry type column on the table. +All geometrical operations in SedonaSQL are on Geometry/Geography type objects. Therefore, before any kind of queries, you need to create a Geometry/Geography type column on the table. ```sql CREATE TABLE city_tbl_geom AS @@ -53,6 +53,22 @@ SELECT Sedona.ST_AsText(geom), city_name FROM city_tbl_geom ``` +Alternatively, you can also create Snowflake native Geometry and Geography type columns. For example, you can create a Snowflake native Geometry type column as follows (note the function has no `SEDONA` prefix): + +```sql +CREATE TABLE city_tbl_geom AS +SELECT ST_GeometryFromWKT(wkt) AS geom, city_name +FROM city_tbl +``` + +The following code creates a Snowflake native Geography type column (note the function has no `SEDONA` prefix): + +```sql +CREATE TABLE city_tbl_geom AS +SELECT ST_GeographyFromWKT(wkt) AS geom, city_name +FROM city_tbl +``` + !!!note SedonaSQL provides lots of functions to create a Geometry column, please read [SedonaSQL API](../../api/snowflake/vector-data/Constructor.md). @@ -226,7 +242,7 @@ WHERE ST_Within(pointdf.pointshape, polygondf.polygonshape) ## Distance join !!!warning - Sedona distance join in Snowflake does not trigger Sedona's optimized spatial join algorithm while Sedona Spark does. It uses Snowflake's default Cartesian join which is very slow. Therefore, it is recommended to use Sedona's S2-based join or Snowflake's native ST functions to do range join, which will trigger Snowflake's `GeoJoin` algorithm. + Sedona distance join in Snowflake does not trigger Sedona's optimized spatial join algorithm while Sedona Spark does. It uses Snowflake's default Cartesian join which is very slow. Therefore, it is recommended to use Sedona's S2-based join or Snowflake's native ST functions + native `Geography` type to do range join, which will trigger Snowflake's `GeoJoin` algorithm. Introduction: Find geometries from A and geometries from B such that the distance of each geometry pair is less or equal than a certain distance. It supports the planar Euclidean distance calculators `ST_Distance`, `ST_HausdorffDistance`, `ST_FrechetDistance` and the meter-based geodesic distance calculators `ST_DistanceSpheroid` and `ST_DistanceSphere`. @@ -358,8 +374,8 @@ You can click the links above to learn more about these functions. More function Sedona can interoperate with Snowflake native functions seamlessly. There are two ways to do this: -* Use Sedona functions to create a Geometry column, then use Snowflake native functions and Sedona functions to query the Geometry column. -* Use Snowflake native functions to create a Geometry column, then use Snowflake native functions and Sedona functions to query the Geometry column. +* Use `Sedona functions` to create a Geometry column, then use Snowflake native functions and Sedona functions to query it. +* Use `Snowflake native functions` to create a Geometry/Geography column, then use Snowflake native functions and Sedona functions to query it. Now we will show you how to do this. @@ -372,7 +388,7 @@ In this case, Sedona uses EWKB type as the input/output type for geometry. If yo In this example, `SEDONA.ST_X` is a Sedona function, `ST_GeommetryFromWkt` and `ST_AsEWKB` are Snowflake native functions. ```sql -SELECT SEDONA.ST_X(ST_AsEWKB(ST_GeommetryFromWkt('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'))) FROM {{geometry_table}}; +SELECT SEDONA.ST_X(ST_AsEWKB(ST_GeometryFromWkt('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'))) FROM {{geometry_table}}; ``` #### From Sedona functions to Snowflake native functions @@ -391,7 +407,7 @@ Sedona geometry constructors are more powerful than Snowflake native functions. * WKB serialization is more efficient. If you need to use multiple Sedona functions, it is more efficient to use this method, which might bring in 2X performance improvement. * SRID information of geometries is preserved. The method below will lose SRID information. -### Geometries created by Snowflake Geometry constructors +### Geometry / Geography created by Snowflake Geometry / Geography constructors In this case, Sedona uses Snowflake native GEOMETRY/GEOGRAPHY type as the input/output type for geometry. The serialization format is GeoJSON string. diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestBase.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestBase.java index 5c830c4d63..0a6faa42ad 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestBase.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestBase.java @@ -71,6 +71,23 @@ public void registerUDF(String functionName, Class ... paramTypes) { } public void registerUDFV2(String functionName, Class ... paramTypes) { + Constants.snowflakeTypeMap.replace("Geometry", "GEOMETRY"); + try { + String ddl = UDFDDLGenerator.buildUDFDDL(UDFsV2.class.getMethod( + functionName, + paramTypes + ), buildDDLConfigs, "@ApacheSedona", false, ""); + System.out.println(ddl); + ResultSet res = snowClient.executeQuery(ddl); + res.next(); + assert res.getString(1).contains("successfully created"); + } catch (SQLException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + public void registerUDFGeography(String functionName, Class ... paramTypes) { + Constants.snowflakeTypeMap.replace("Geometry", "GEOGRAPHY"); try { String ddl = UDFDDLGenerator.buildUDFDDL(UDFsV2.class.getMethod( functionName, diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsGeography.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsGeography.java new file mode 100644 index 0000000000..5c1bd12b8d --- /dev/null +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsGeography.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sedona.snowflake.snowsql; + +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(SnowTestRunner.class) +public class TestFunctionsGeography + extends TestBase { + @Test + public void test_GeometryType() { + registerUDFGeography("GeometryType", String.class); + verifySqlSingleRes( + "select sedona.GeometryType(ST_GeographyFromWKT('POINT(1 2)'))", + "POINT" + ); + } +} diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDFDDLGenerator.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDFDDLGenerator.java index ec6df3f8dc..77b7495cc0 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDFDDLGenerator.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDFDDLGenerator.java @@ -85,6 +85,13 @@ public static List buildAll(Map configs, String stageNam ddlList.add(buildUDFDDL(method, configs, stageName, isNativeApp, appRoleName)); } } + // Replace Geometry with GEOGRAPHY and generate DDL for UDFsV2 again + Constants.snowflakeTypeMap.replace("Geometry", "GEOGRAPHY"); + for (Method method : udfV2Methods()) { + if (method.getModifiers() == (Modifier.PUBLIC | Modifier.STATIC)) { + ddlList.add(buildUDFDDL(method, configs, stageName, isNativeApp, appRoleName)); + } + } return ddlList; }