Skip to content

Commit

Permalink
GC: consider statistics files (#9898)
Browse files Browse the repository at this point in the history
(Untested) implementation to consider referenced statistics files as 'live'.
  • Loading branch information
snazy authored Nov 11, 2024
1 parent 912bdea commit 8731441
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ as necessary. Empty sections will not end in the release notes.

### Fixes

- GC: Consider referenced statistics (and partition statistics) files as 'live'.

### Commits

## [0.99.0] Release (2024-09-26)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestReaderUtil;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionStatisticsFile;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.StatisticsFile;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableMetadataParser;
import org.apache.iceberg.io.CloseableIterable;
Expand Down Expand Up @@ -137,11 +139,26 @@ private Stream<FileReference> extractTableFiles(ContentReference contentReferenc
Snapshot snapshot =
snapshotId < 0L ? tableMetadata.currentSnapshot() : tableMetadata.snapshot(snapshotId);

Map<Integer, PartitionSpec> specsById = tableMetadata.specsById();

Stream<StorageUri> allFiles = elementaryUrisFromSnapshot(snapshot, contentReference);

if (snapshot != null) {
long effectiveSnapshotId = snapshot.snapshotId();
allFiles =
Stream.concat(
allFiles,
tableMetadata.statisticsFiles().stream()
.filter(s -> s.snapshotId() == effectiveSnapshotId)
.map(StatisticsFile::path)
.map(StorageUri::of));
allFiles =
Stream.concat(
allFiles,
tableMetadata.partitionStatisticsFiles().stream()
.filter(s -> s.snapshotId() == effectiveSnapshotId)
.map(PartitionStatisticsFile::path)
.map(StorageUri::of));

Map<Integer, PartitionSpec> specsById = tableMetadata.specsById();
allFiles =
Stream.concat(
allFiles,
Expand Down

0 comments on commit 8731441

Please sign in to comment.