-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Pai…
…mon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency [Feature][CONNECTORS-V2-Paimon] Dynamic bucket splitting improves Paimon writing efficiency
- Loading branch information
Showing
14 changed files
with
814 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
...va/org/apache/seatunnel/connectors/seatunnel/paimon/sink/bucket/PaimonBucketAssigner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.seatunnel.connectors.seatunnel.paimon.sink.bucket; | ||
|
||
import org.apache.commons.collections.CollectionUtils; | ||
import org.apache.paimon.codegen.CodeGenUtils; | ||
import org.apache.paimon.codegen.Projection; | ||
import org.apache.paimon.crosspartition.IndexBootstrap; | ||
import org.apache.paimon.data.InternalRow; | ||
import org.apache.paimon.index.SimpleHashBucketAssigner; | ||
import org.apache.paimon.reader.RecordReader; | ||
import org.apache.paimon.reader.RecordReaderIterator; | ||
import org.apache.paimon.schema.TableSchema; | ||
import org.apache.paimon.table.FileStoreTable; | ||
import org.apache.paimon.table.Table; | ||
import org.apache.paimon.table.sink.RowPartitionKeyExtractor; | ||
|
||
import java.io.IOException; | ||
|
||
public class PaimonBucketAssigner { | ||
|
||
private final RowPartitionKeyExtractor extractor; | ||
|
||
private final Projection bucketKeyProjection; | ||
|
||
private final SimpleHashBucketAssigner simpleHashBucketAssigner; | ||
|
||
private final TableSchema schema; | ||
|
||
public PaimonBucketAssigner(Table table, int numAssigners, int assignId) { | ||
FileStoreTable fileStoreTable = (FileStoreTable) table; | ||
this.schema = fileStoreTable.schema(); | ||
this.extractor = new RowPartitionKeyExtractor(fileStoreTable.schema()); | ||
this.bucketKeyProjection = | ||
CodeGenUtils.newProjection( | ||
fileStoreTable.schema().logicalRowType(), | ||
fileStoreTable.schema().projection(fileStoreTable.schema().bucketKeys())); | ||
long dynamicBucketTargetRowNum = | ||
((FileStoreTable) table).coreOptions().dynamicBucketTargetRowNum(); | ||
this.simpleHashBucketAssigner = | ||
new SimpleHashBucketAssigner(numAssigners, assignId, dynamicBucketTargetRowNum); | ||
loadBucketIndex(fileStoreTable, numAssigners, assignId); | ||
} | ||
|
||
private void loadBucketIndex(FileStoreTable fileStoreTable, int numAssigners, int assignId) { | ||
IndexBootstrap indexBootstrap = new IndexBootstrap(fileStoreTable); | ||
try (RecordReader<InternalRow> recordReader = | ||
indexBootstrap.bootstrap(numAssigners, assignId)) { | ||
RecordReaderIterator<InternalRow> readerIterator = | ||
new RecordReaderIterator<>(recordReader); | ||
while (readerIterator.hasNext()) { | ||
InternalRow row = readerIterator.next(); | ||
assign(row); | ||
} | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
public int assign(InternalRow rowData) { | ||
int hash; | ||
if (CollectionUtils.isEmpty(this.schema.bucketKeys())) { | ||
hash = extractor.trimmedPrimaryKey(rowData).hashCode(); | ||
} else { | ||
hash = bucketKeyProjection.apply(rowData).hashCode(); | ||
} | ||
return Math.abs( | ||
this.simpleHashBucketAssigner.assign(this.extractor.partition(rowData), hash)); | ||
} | ||
} |
91 changes: 91 additions & 0 deletions
91
...rg/apache/seatunnel/connectors/seatunnel/paimon/sink/bucket/PaimonBucketAssignerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.seatunnel.connectors.seatunnel.paimon.sink.bucket; | ||
|
||
import org.apache.paimon.catalog.Catalog; | ||
import org.apache.paimon.catalog.CatalogContext; | ||
import org.apache.paimon.catalog.CatalogFactory; | ||
import org.apache.paimon.catalog.Identifier; | ||
import org.apache.paimon.data.BinaryString; | ||
import org.apache.paimon.data.GenericRow; | ||
import org.apache.paimon.options.Options; | ||
import org.apache.paimon.schema.Schema; | ||
import org.apache.paimon.table.FileStoreTable; | ||
import org.apache.paimon.table.Table; | ||
import org.apache.paimon.table.sink.RowPartitionKeyExtractor; | ||
import org.apache.paimon.types.DataTypes; | ||
|
||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
public class PaimonBucketAssignerTest { | ||
|
||
private Table table; | ||
private static final String TABLE_NAME = "default_table"; | ||
private static final String DATABASE_NAME = "default_database"; | ||
|
||
@BeforeEach | ||
public void before() throws Exception { | ||
boolean isWindows = | ||
System.getProperties().getProperty("os.name").toUpperCase().contains("WINDOWS"); | ||
Options options = new Options(); | ||
if (isWindows) { | ||
options.set("warehouse", "C:/Users/" + System.getProperty("user.name") + "/tmp/paimon"); | ||
} else { | ||
options.set("warehouse", "file:///tmp/paimon"); | ||
} | ||
Catalog catalog = CatalogFactory.createCatalog(CatalogContext.create(options)); | ||
catalog.createDatabase(DATABASE_NAME, true); | ||
Identifier identifier = Identifier.create(DATABASE_NAME, TABLE_NAME); | ||
if (!catalog.tableExists(identifier)) { | ||
Schema.Builder schemaBuilder = Schema.newBuilder(); | ||
schemaBuilder.column("id", DataTypes.INT(), "primary Key"); | ||
schemaBuilder.column("name", DataTypes.STRING(), "name"); | ||
schemaBuilder.primaryKey("id"); | ||
schemaBuilder.option("bucket", "-1"); | ||
schemaBuilder.option("dynamic-bucket.target-row-num", "20"); | ||
Schema schema = schemaBuilder.build(); | ||
catalog.createTable(identifier, schema, false); | ||
} | ||
table = catalog.getTable(identifier); | ||
} | ||
|
||
@Test | ||
public void bucketAssigner() { | ||
FileStoreTable fileStoreTable = (FileStoreTable) table; | ||
RowPartitionKeyExtractor keyExtractor = | ||
new RowPartitionKeyExtractor(fileStoreTable.schema()); | ||
PaimonBucketAssigner paimonBucketAssigner = new PaimonBucketAssigner(fileStoreTable, 1, 0); | ||
Map<Integer, Integer> bucketInformation = new HashMap<>(); | ||
for (int i = 0; i < 50; i++) { | ||
GenericRow row = GenericRow.of(i, BinaryString.fromString(String.valueOf(i))); | ||
int assign = paimonBucketAssigner.assign(row); | ||
int hashCode = keyExtractor.trimmedPrimaryKey(row).hashCode(); | ||
bucketInformation.put(hashCode, assign); | ||
} | ||
List<Integer> bucketSize = | ||
bucketInformation.values().stream().distinct().collect(Collectors.toList()); | ||
Assertions.assertEquals(bucketSize.size(), 3); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.