From e8949e2df1151896316b1fb7e53d1a30fca9ab47 Mon Sep 17 00:00:00 2001 From: atask-g Date: Fri, 9 Aug 2024 13:39:32 -0400 Subject: [PATCH] Added support for PARTITION BY and ORDER BY in search indexes (#1776) * Added support for search indexes * Addressed review feedback * Ran mvn spotless:apply to fix formatting * Added support for search indexes * Add support for PARTITION BY * Added support for ORDER BY in search indexes * Updated SQL test * Ran mvn spotless:apply to fix formatting * Modified partitionBy and orderBy to be optional properties * Removed merge conflict --- .../cloud/teleport/spanner/ddl/Index.java | 32 +++++++++++++++++++ .../spanner/ddl/InformationSchemaScanner.java | 14 +++++++- .../teleport/spanner/ImportPipelineIT.java | 6 ---- .../ddl/InformationSchemaScannerIT.java | 22 +++++++++---- .../ddl/InformationSchemaScannerTest.java | 2 +- 5 files changed, 62 insertions(+), 14 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Index.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Index.java index 6bfe3dcf2b..71ff428f10 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Index.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Index.java @@ -57,6 +57,12 @@ public abstract class Index implements Serializable { @Nullable abstract String type(); + @Nullable + abstract ImmutableList partitionBy(); + + @Nullable + abstract ImmutableList orderBy(); + public static Builder builder(Dialect dialect) { return new AutoValue_Index.Builder().dialect(dialect).nullFiltered(false).unique(false); } @@ -148,6 +154,28 @@ private void prettyPrintGsql(Appendable appendable) throws IOException { appendable.append(" STORING (").append(storingString).append(")"); } + if (partitionBy() != null) { + String partitionByString = + partitionBy().stream() + .map(c -> quoteIdentifier(c, dialect())) + .collect(Collectors.joining(",")); + + if (!partitionByString.isEmpty()) { + appendable.append(" PARTITION BY ").append(partitionByString); + } + } + + if (orderBy() != null) { + String orderByString = + orderBy().stream() + .map(c -> quoteIdentifier(c, dialect())) + .collect(Collectors.joining(",")); + + if (!orderByString.isEmpty()) { + appendable.append(" ORDER BY ").append(orderByString); + } + } + if (interleaveIn() != null) { appendable.append(", INTERLEAVE IN ").append(quoteIdentifier(interleaveIn(), dialect())); } @@ -224,6 +252,10 @@ public Builder nullFiltered() { public abstract Builder type(String type); + public abstract Builder partitionBy(ImmutableList keys); + + public abstract Builder orderBy(ImmutableList keys); + abstract Index autoBuild(); public Index build() { diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index bf4244dda5..bca83beccc 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -385,6 +385,16 @@ private void listIndexes(Map> indexe ? resultSet.getString(6) : null; + ImmutableList searchPartitionBy = + (dialect == Dialect.GOOGLE_STANDARD_SQL && !resultSet.isNull(7)) + ? ImmutableList.builder().addAll(resultSet.getStringList(7)).build() + : null; + + ImmutableList searchOrderBy = + (dialect == Dialect.GOOGLE_STANDARD_SQL && !resultSet.isNull(8)) + ? ImmutableList.builder().addAll(resultSet.getStringList(8)).build() + : null; + Map tableIndexes = indexes.computeIfAbsent(tableName, k -> Maps.newTreeMap()); @@ -397,6 +407,8 @@ private void listIndexes(Map> indexe .nullFiltered(nullFiltered) .interleaveIn(parent) .type(type) + .partitionBy(searchPartitionBy) + .orderBy(searchOrderBy) .filter(filter)); } } @@ -407,7 +419,7 @@ Statement listIndexesSQL() { case GOOGLE_STANDARD_SQL: return Statement.of( "SELECT t.table_schema, t.table_name, t.index_name, t.parent_table_name, t.is_unique," - + " t.is_null_filtered, t.index_type" + + " t.is_null_filtered, t.index_type, t.search_partition_by, t.search_order_by" + " FROM information_schema.indexes AS t" + " WHERE t.table_schema NOT IN" + " ('INFORMATION_SCHEMA', 'SPANNER_SYS') AND" diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java index e2c2db2862..da8784de4b 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java @@ -170,12 +170,6 @@ private void testGoogleSqlImportPipelineBase( + ") PRIMARY KEY(Key)"; spannerResourceManager.executeDdlStatement(createFloat32TableStatement); - String createSearchIndexStatement = - "CREATE SEARCH INDEX `SearchIndex`\n" - + " ON `Singers`(`MyTokens` ASC)\n" - + " OPTIONS (sort_order_sharding=TRUE)"; - spannerResourceManager.executeDdlStatement(createSearchIndexStatement); - PipelineLauncher.LaunchConfig.Builder options = paramsAdder.apply( PipelineLauncher.LaunchConfig.builder(testName, specPath) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java index eefea33109..5983520230 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java @@ -573,12 +573,22 @@ public void searchIndexes() throws Exception { // Prefix indexes to ensure ordering. List statements = Arrays.asList( - "CREATE TABLE `Base` (" - + " `MyKey` INT64 NOT NULL," - + " `MyData` STRING(MAX)," - + " `MyTokens` TOKENLIST AS (TOKENIZE_FULLTEXT(MyData)) HIDDEN," - + " ) PRIMARY KEY (`MyKey` ASC)", - " CREATE SEARCH INDEX `SearchIndex` ON `Base`(`MyTokens` ASC)" + "CREATE TABLE `Users` (" + + " `UserId` INT64 NOT NULL," + + " ) PRIMARY KEY (`UserId` ASC)", + " CREATE TABLE `Messages` (" + + " `UserId` INT64 NOT NULL," + + " `MessageId` INT64 NOT NULL," + + " `Subject` STRING(MAX)," + + " `Subject_Tokens` TOKENLIST AS (TOKENIZE_FULLTEXT(`Subject`)) HIDDEN," + + " `Body` STRING(MAX)," + + " `Body_Tokens` TOKENLIST AS (TOKENIZE_FULLTEXT(`Body`)) HIDDEN," + + " `Data` STRING(MAX)," + + " ) PRIMARY KEY (`UserId` ASC, `MessageId` ASC), INTERLEAVE IN PARENT `Users`", + " CREATE SEARCH INDEX `SearchIndex` ON `Messages`(`Subject_Tokens` ASC, `Body_Tokens` ASC)" + + " STORING (`Data`)" + + " PARTITION BY `UserId`," + + " INTERLEAVE IN `Users`" + " OPTIONS (sort_order_sharding=TRUE)"); spannerServer.createDatabase(dbId, statements); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java index 2faf1380f9..6c33af0356 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java @@ -71,7 +71,7 @@ public void testListIndexesSQL() { googleSQLInfoScanner.listIndexesSQL().getSql(), equalToCompressingWhiteSpace( "SELECT t.table_schema, t.table_name, t.index_name, t.parent_table_name, t.is_unique," - + " t.is_null_filtered, t.index_type" + + " t.is_null_filtered, t.index_type, t.search_partition_by, t.search_order_by" + " FROM information_schema.indexes AS t" + " WHERE t.table_schema NOT IN" + " ('INFORMATION_SCHEMA', 'SPANNER_SYS') AND"