Skip to content

Commit

Permalink
[flink] Compact procedure supports named arguments (#3078)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzelin authored Mar 25, 2024
1 parent 419d354 commit d6d2929
Show file tree
Hide file tree
Showing 10 changed files with 440 additions and 147 deletions.
33 changes: 18 additions & 15 deletions docs/content/engines/flink.md
Original file line number Diff line number Diff line change
Expand Up @@ -324,17 +324,23 @@ SELECT * FROM T;

Flink 1.18 and later versions support [Call Statements](https://nightlies.apache.org/flink/flink-docs-master/docs/dev/table/sql/call/),
which make it easier to manipulate data and metadata of Paimon table by writing SQLs instead of submitting Flink jobs.
All available procedures are listed below. Note that when you call a procedure, you must pass all parameters in order,
and if you don't want to pass some parameters, you must use `''` as placeholder. For example, if you want to compact
table `default.t` with parallelism 4, but you don't want to specify partitions and sort strategy, the call statement
should be \

In 1.18, the procedure only supports passing arguments by position. You must pass all arguments in order, and if you
don't want to pass some arguments, you must use `''` as placeholder. For example, if you want to compact table `default.t`
with parallelism 4, but you don't want to specify partitions and sort strategy, the call statement should be \
`CALL sys.compact('default.t', '', '', '', 'sink.parallelism=4')`.

In higher versions, the procedure supports passing arguments by name. You can pass arguments in any order and any optional
argument can be omitted. For the above example, the call statement is \
``CALL sys.compact(`table` => 'default.t', options => 'sink.parallelism=4')``.

Specify partitions: we use string to represent partition filter. "," means "AND" and ";" means "OR". For example, if you want
to specify two partitions date=01 and date=02, you need to write 'date=01;date=02'; If you want to specify one partition
with date=01 and day=01, you need to write 'date=01,day=01'.

table options syntax: we use string to represent table options. The format is 'key1=value1,key2=value2...'.
Table options syntax: we use string to represent table options. The format is 'key1=value1,key2=value2...'.

All available procedures are listed below.

<table class="table table-bordered">
<thead>
Expand All @@ -349,20 +355,17 @@ table options syntax: we use string to represent table options. The format is 'k
<tr>
<td>compact</td>
<td>
CALL [catalog.]sys.compact('identifier') <br/><br/>
CALL [catalog.]sys.compact('identifier', 'partitions') <br/><br/>
CALL [catalog.]sys.compact('identifier', 'partitions', 'order_strategy', 'order_columns', 'table_options')
</td>
<td>
TO compact a table. Arguments:
<li>identifier: the target table identifier. Cannot be empty.</li>
<li>partitions: partition filter.</li>
<li>order_strategy: 'order' or 'zorder' or 'hilbert' or 'none'. Left empty for 'none'.</li>
<li>order_columns: the columns need to be sort. Left empty if 'order_strategy' is 'none'.</li>
<li>table_options: additional dynamic options of the table.</li>
To compact a table. Arguments:
<li>table(required): the target table identifier.</li>
<li>partitions(optional): partition filter.</li>
<li>order_strategy(optional): 'order' or 'zorder' or 'hilbert' or 'none'.</li>
<li>order_by(optional): the columns need to be sort. Left empty if 'order_strategy' is 'none'.</li>
<li>options(optional): additional dynamic options of the table.</li>
</td>
<td>
CALL sys.compact('default.T', 'p=0', 'zorder', 'a,b', 'sink.parallelism=4')
CALL sys.compact(`table` => 'default.T', partitions => 'p=0', order_strategy => 'zorder', order_by => 'a,b', options => 'sink.parallelism=4')
</td>
</tr>
<tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,16 @@ public void testCompact() throws Exception {
testDataSourceDdl);

// execute compact procedure
String callStatement;
if (System.getProperty("test.flink.main.version").compareTo("1.18") == 0) {
callStatement = "CALL sys.compact('default.ts_table', 'dt=20221205;dt=20221206');";
} else {
callStatement =
"CALL sys.compact(\\`table\\` => 'default.ts_table', partitions => 'dt=20221205;dt=20221206');";
}

runSql(
"SET 'execution.checkpointing.interval' = '1s';\n"
+ "CALL sys.compact('default.ts_table', 'dt=20221205;dt=20221206');",
"SET 'execution.checkpointing.interval' = '1s';\n" + callStatement,
catalogDdl,
useCatalogCmd);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.flink.procedure;

import org.apache.paimon.catalog.AbstractCatalog;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.flink.action.CompactAction;
import org.apache.paimon.flink.action.SortCompactAction;
import org.apache.paimon.utils.ParameterUtils;
import org.apache.paimon.utils.StringUtils;

import org.apache.flink.table.procedure.ProcedureContext;

import java.util.Collections;
import java.util.Map;

/**
* Stay compatible with 1.18 procedure which doesn't support named argument. Usage:
*
* <pre><code>
* -- NOTE: use '' as placeholder for optional arguments
*
* -- compact a table (tableId should be 'database_name.table_name')
* CALL sys.compact('tableId')
*
* -- compact specific partitions ('pt1=A,pt2=a;pt1=B,pt2=b', ...)
* CALL sys.compact('tableId', 'pt1=A,pt2=a;pt1=B,pt2=b')
*
* -- compact a table with sorting
* CALL sys.compact('tableId', 'partitions', 'ORDER/ZORDER', 'col1,col2', 'sink.parallelism=6')
*
* </code></pre>
*/
public class CompactProcedure extends ProcedureBase {

public static final String IDENTIFIER = "compact";

public String[] call(ProcedureContext procedureContext, String tableId) throws Exception {
return call(procedureContext, tableId, "");
}

public String[] call(ProcedureContext procedureContext, String tableId, String partitions)
throws Exception {
return call(procedureContext, tableId, partitions, "", "", "");
}

public String[] call(
ProcedureContext procedureContext,
String tableId,
String partitions,
String orderStrategy,
String orderByColumns)
throws Exception {
return call(procedureContext, tableId, partitions, orderStrategy, orderByColumns, "");
}

public String[] call(
ProcedureContext procedureContext,
String tableId,
String partitions,
String orderStrategy,
String orderByColumns,
String tableOptions)
throws Exception {
String warehouse = ((AbstractCatalog) catalog).warehouse();
Map<String, String> catalogOptions = ((AbstractCatalog) catalog).options();
Map<String, String> tableConf =
StringUtils.isBlank(tableOptions)
? Collections.emptyMap()
: ParameterUtils.parseCommaSeparatedKeyValues(tableOptions);
Identifier identifier = Identifier.fromString(tableId);
CompactAction action;
String jobName;
if (orderStrategy.isEmpty() && orderByColumns.isEmpty()) {
action =
new CompactAction(
warehouse,
identifier.getDatabaseName(),
identifier.getObjectName(),
catalogOptions,
tableConf);
jobName = "Compact Job";
} else if (!orderStrategy.isEmpty() && !orderByColumns.isEmpty()) {
action =
new SortCompactAction(
warehouse,
identifier.getDatabaseName(),
identifier.getObjectName(),
catalogOptions,
tableConf)
.withOrderStrategy(orderStrategy)
.withOrderColumns(orderByColumns.split(","));
jobName = "Sort Compact Job";
} else {
throw new IllegalArgumentException(
"You must specify 'order strategy' and 'order by columns' both.");
}

if (!(StringUtils.isBlank(partitions))) {
action.withPartitions(ParameterUtils.getPartitions(partitions.split(";")));
}

return execute(procedureContext, action, jobName);
}

@Override
public String identifier() {
return IDENTIFIER;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.flink.procedure;

import org.apache.paimon.flink.CatalogITCaseBase;

import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThatCode;

/** Ensure that the legacy multiply overloaded CALL with positional arguments can be invoked. */
public class ProcedurePositionalArgumentsITCase extends CatalogITCaseBase {

@Test
public void testCallCompact() {
sql(
"CREATE TABLE T ("
+ " k INT,"
+ " v INT,"
+ " pt INT,"
+ " PRIMARY KEY (k, pt) NOT ENFORCED"
+ ") PARTITIONED BY (pt) WITH ("
+ " 'write-only' = 'true',"
+ " 'bucket' = '1'"
+ ")");

assertThatCode(() -> sql("CALL sys.compact('default.T')")).doesNotThrowAnyException();
assertThatCode(() -> sql("CALL sys.compact('default.T', 'pt=1')"))
.doesNotThrowAnyException();
assertThatCode(() -> sql("CALL sys.compact('default.T', 'pt=1', '', '')"))
.doesNotThrowAnyException();
assertThatCode(() -> sql("CALL sys.compact('default.T', '', '', '', 'sink.parallelism=1')"))
.doesNotThrowAnyException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,54 +22,38 @@
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.flink.action.CompactAction;
import org.apache.paimon.flink.action.SortCompactAction;
import org.apache.paimon.utils.ParameterUtils;
import org.apache.paimon.utils.StringUtils;

import org.apache.flink.table.annotation.ArgumentHint;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.ProcedureHint;
import org.apache.flink.table.procedure.ProcedureContext;

import java.util.Collections;
import java.util.Map;

/**
* Compact procedure. Usage:
*
* <pre><code>
* -- NOTE: use '' as placeholder for optional arguments
*
* -- compact a table (tableId should be 'database_name.table_name')
* CALL sys.compact('tableId')
*
* -- compact specific partitions ('pt1=A,pt2=a;pt1=B,pt2=b', ...)
* CALL sys.compact('tableId', 'pt1=A,pt2=a;pt1=B,pt2=b')
*
* -- compact a table with sorting
* CALL sys.compact('tableId', 'partitions', 'ORDER/ZORDER', 'col1,col2', 'sink.parallelism=6')
*
* </code></pre>
*/
import static org.apache.paimon.utils.ParameterUtils.getPartitions;
import static org.apache.paimon.utils.ParameterUtils.parseCommaSeparatedKeyValues;
import static org.apache.paimon.utils.StringUtils.isBlank;

/** Compact procedure. */
public class CompactProcedure extends ProcedureBase {

public static final String IDENTIFIER = "compact";

public String[] call(ProcedureContext procedureContext, String tableId) throws Exception {
return call(procedureContext, tableId, "");
}

public String[] call(ProcedureContext procedureContext, String tableId, String partitions)
throws Exception {
return call(procedureContext, tableId, partitions, "", "", "");
}

public String[] call(
ProcedureContext procedureContext,
String tableId,
String partitions,
String orderStrategy,
String orderByColumns)
throws Exception {
return call(procedureContext, tableId, partitions, orderStrategy, orderByColumns, "");
}

@ProcedureHint(
argument = {
@ArgumentHint(name = "table", type = @DataTypeHint("STRING")),
@ArgumentHint(
name = "partitions",
type = @DataTypeHint("STRING"),
isOptional = true),
@ArgumentHint(
name = "order_strategy",
type = @DataTypeHint("STRING"),
isOptional = true),
@ArgumentHint(name = "order_by", type = @DataTypeHint("STRING"), isOptional = true),
@ArgumentHint(name = "options", type = @DataTypeHint("STRING"), isOptional = true)
})
public String[] call(
ProcedureContext procedureContext,
String tableId,
Expand All @@ -81,13 +65,13 @@ public String[] call(
String warehouse = ((AbstractCatalog) catalog).warehouse();
Map<String, String> catalogOptions = ((AbstractCatalog) catalog).options();
Map<String, String> tableConf =
StringUtils.isBlank(tableOptions)
isBlank(tableOptions)
? Collections.emptyMap()
: ParameterUtils.parseCommaSeparatedKeyValues(tableOptions);
: parseCommaSeparatedKeyValues(tableOptions);
Identifier identifier = Identifier.fromString(tableId);
CompactAction action;
String jobName;
if (orderStrategy.isEmpty() && orderByColumns.isEmpty()) {
if (isBlank(orderStrategy) && isBlank(orderByColumns)) {
action =
new CompactAction(
warehouse,
Expand All @@ -96,7 +80,7 @@ public String[] call(
catalogOptions,
tableConf);
jobName = "Compact Job";
} else if (!orderStrategy.isEmpty() && !orderByColumns.isEmpty()) {
} else if (!isBlank(orderStrategy) && !isBlank(orderByColumns)) {
action =
new SortCompactAction(
warehouse,
Expand All @@ -112,8 +96,8 @@ public String[] call(
"You must specify 'order strategy' and 'order by columns' both.");
}

if (!(StringUtils.isBlank(partitions))) {
action.withPartitions(ParameterUtils.getPartitions(partitions.split(";")));
if (!(isBlank(partitions))) {
action.withPartitions(getPartitions(partitions.split(";")));
}

return execute(procedureContext, action, jobName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.apache.paimon.flink.util.AbstractTestBase;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.table.Table;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.utils.BlockingIterator;
import org.apache.paimon.utils.SnapshotManager;

Expand Down Expand Up @@ -183,10 +183,11 @@ protected CatalogTable table(String tableName) throws TableNotExistException {
return (CatalogTable) table;
}

protected Table paimonTable(String tableName)
protected FileStoreTable paimonTable(String tableName)
throws org.apache.paimon.catalog.Catalog.TableNotExistException {
org.apache.paimon.catalog.Catalog catalog = flinkCatalog().catalog();
return catalog.getTable(Identifier.create(tEnv.getCurrentDatabase(), tableName));
return (FileStoreTable)
catalog.getTable(Identifier.create(tEnv.getCurrentDatabase(), tableName));
}

private FlinkCatalog flinkCatalog() {
Expand Down
Loading

0 comments on commit d6d2929

Please sign in to comment.