From 530b7001a65e3b4a0afedd0b90b2596c8b86ed48 Mon Sep 17 00:00:00 2001 From: 7hong Date: Fri, 6 Dec 2024 15:30:08 +0800 Subject: [PATCH] [ARORO-3289] Avoid calling getMixedTablePartitionSpecById in the scan loop (#3290) Add getSpec to TableFileScanHelper trigger CI Add getSpec to TableFileScanHelper --- .../optimizing/plan/AbstractOptimizingEvaluator.java | 5 +---- .../optimizing/scan/IcebergTableFileScanHelper.java | 10 ++++++++++ .../optimizing/scan/KeyedTableFileScanHelper.java | 11 +++++++++++ .../amoro/optimizing/scan/TableFileScanHelper.java | 3 +++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/plan/AbstractOptimizingEvaluator.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/plan/AbstractOptimizingEvaluator.java index 2fc8e399d5..abd30b002f 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/plan/AbstractOptimizingEvaluator.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/plan/AbstractOptimizingEvaluator.java @@ -32,7 +32,6 @@ import org.apache.amoro.table.KeyedTableSnapshot; import org.apache.amoro.table.MixedTable; import org.apache.amoro.table.TableSnapshot; -import org.apache.amoro.utils.MixedTableUtil; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; @@ -121,9 +120,7 @@ private void initPartitionPlans(TableFileScanHelper tableFileScanHelper) { try (CloseableIterable results = tableFileScanHelper.scan()) { for (TableFileScanHelper.FileScanResult fileScanResult : results) { - PartitionSpec partitionSpec = - MixedTableUtil.getMixedTablePartitionSpecById( - mixedTable, fileScanResult.file().specId()); + PartitionSpec partitionSpec = tableFileScanHelper.getSpec(fileScanResult.file().specId()); StructLike partition = fileScanResult.file().partition(); String partitionPath = partitionSpec.partitionToPath(partition); PartitionEvaluator evaluator = diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/IcebergTableFileScanHelper.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/IcebergTableFileScanHelper.java index 856a89ebc1..de85e9a451 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/IcebergTableFileScanHelper.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/IcebergTableFileScanHelper.java @@ -22,19 +22,24 @@ import org.apache.amoro.shade.guava32.com.google.common.collect.Lists; import org.apache.amoro.utils.IcebergThreadPools; import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.io.CloseableIterable; +import java.util.Map; + public class IcebergTableFileScanHelper implements TableFileScanHelper { private final Table table; private Expression partitionFilter = Expressions.alwaysTrue(); private final long snapshotId; + private final Map specs; public IcebergTableFileScanHelper(Table table, long snapshotId) { this.table = table; this.snapshotId = snapshotId; + this.specs = table.specs(); } @Override @@ -61,4 +66,9 @@ public TableFileScanHelper withPartitionFilter(Expression partitionFilter) { this.partitionFilter = partitionFilter; return this; } + + @Override + public PartitionSpec getSpec(int specId) { + return specs.get(specId); + } } diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/KeyedTableFileScanHelper.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/KeyedTableFileScanHelper.java index 30f28ee6c1..6327862409 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/KeyedTableFileScanHelper.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/KeyedTableFileScanHelper.java @@ -68,11 +68,13 @@ public class KeyedTableFileScanHelper implements TableFileScanHelper { private final long changeSnapshotId; private final long baseSnapshotId; private Expression partitionFilter = Expressions.alwaysTrue(); + private final PartitionSpec spec; public KeyedTableFileScanHelper(KeyedTable keyedTable, KeyedTableSnapshot snapshot) { this.keyedTable = keyedTable; this.baseSnapshotId = snapshot.baseSnapshotId(); this.changeSnapshotId = snapshot.changeSnapshotId(); + this.spec = keyedTable.spec(); } /** @@ -441,4 +443,13 @@ public void setMinTransactionIdAfter(long minTransactionIdAfter) { this.minTransactionIdAfter = minTransactionIdAfter; } } + + @Override + public PartitionSpec getSpec(int specId) { + if (specId != spec.specId()) { + throw new IllegalArgumentException( + "Partition spec id " + specId + " not found in table " + keyedTable.name()); + } + return spec; + } } diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/TableFileScanHelper.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/TableFileScanHelper.java index cfcdb26591..962cbd24fa 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/TableFileScanHelper.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/scan/TableFileScanHelper.java @@ -20,6 +20,7 @@ import org.apache.iceberg.ContentFile; import org.apache.iceberg.DataFile; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.io.CloseableIterable; @@ -47,4 +48,6 @@ public List> deleteFiles() { CloseableIterable scan(); TableFileScanHelper withPartitionFilter(Expression partitionFilter); + + PartitionSpec getSpec(int specId); }